Merge remote-tracking branch 'rschu1ze/master' into qatzstd_main

This commit is contained in:
Robert Schulze 2024-01-09 20:36:02 +00:00
commit ced9e93ac6
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
222 changed files with 5446 additions and 1553 deletions

3
.gitmodules vendored
View File

@ -366,3 +366,6 @@
[submodule "contrib/sqids-cpp"]
path = contrib/sqids-cpp
url = https://github.com/sqids/sqids-cpp.git
[submodule "contrib/idna"]
path = contrib/idna
url = https://github.com/ada-url/idna.git

View File

@ -33,7 +33,7 @@ curl https://clickhouse.com/ | sh
## Upcoming Events
Keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com.
Keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com.
## Recent Recordings
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"

View File

@ -154,6 +154,7 @@ add_contrib (libpqxx-cmake libpqxx)
add_contrib (libpq-cmake libpq)
add_contrib (nuraft-cmake NuRaft)
add_contrib (fast_float-cmake fast_float)
add_contrib (idna-cmake idna)
add_contrib (datasketches-cpp-cmake datasketches-cpp)
add_contrib (incbin-cmake incbin)
add_contrib (sqids-cpp-cmake sqids-cpp)

1
contrib/idna vendored Submodule

@ -0,0 +1 @@
Subproject commit 3c8be01d42b75649f1ac9b697d0ef757eebfe667

View File

@ -0,0 +1,24 @@
option(ENABLE_IDNA "Enable idna support" ${ENABLE_LIBRARIES})
if ((NOT ENABLE_IDNA))
message (STATUS "Not using idna")
return()
endif()
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/idna")
set (SRCS
"${LIBRARY_DIR}/src/idna.cpp"
"${LIBRARY_DIR}/src/mapping.cpp"
"${LIBRARY_DIR}/src/mapping_tables.cpp"
"${LIBRARY_DIR}/src/normalization.cpp"
"${LIBRARY_DIR}/src/normalization_tables.cpp"
"${LIBRARY_DIR}/src/punycode.cpp"
"${LIBRARY_DIR}/src/to_ascii.cpp"
"${LIBRARY_DIR}/src/to_unicode.cpp"
"${LIBRARY_DIR}/src/unicode_transcoding.cpp"
"${LIBRARY_DIR}/src/validity.cpp"
)
add_library (_idna ${SRCS})
target_include_directories(_idna PUBLIC "${LIBRARY_DIR}/include")
add_library (ch_contrib::idna ALIAS _idna)

View File

@ -44,6 +44,9 @@ if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TR
# It is not needed, we will explicitly create tables on s3.
# We do not have statefull tests with s3 storage run in public repository, but this is needed for another repository.
rm /etc/clickhouse-server/config.d/s3_storage_policy_for_merge_tree_by_default.xml
rm /etc/clickhouse-server/config.d/storage_metadata_with_full_object_key.xml
rm /etc/clickhouse-server/config.d/s3_storage_policy_with_template_object_key.xml
fi
function start()

View File

@ -193,6 +193,7 @@ stop
# Let's enable S3 storage by default
export USE_S3_STORAGE_FOR_MERGE_TREE=1
export $RANDOMIZE_OBJECT_KEY_TYPE=1
export ZOOKEEPER_FAULT_INJECTION=1
configure

View File

@ -11,7 +11,7 @@ This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ec
``` sql
CREATE TABLE s3_queue_engine_table (name String, value UInt32)
ENGINE = S3Queue(path [, NOSIGN | aws_access_key_id, aws_secret_access_key,] format, [compression])
ENGINE = S3Queue(path, [NOSIGN, | aws_access_key_id, aws_secret_access_key,] format, [compression])
[SETTINGS]
[mode = 'unordered',]
[after_processing = 'keep',]

View File

@ -1143,6 +1143,8 @@ Optional parameters:
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`.
- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here.
### Configuring the cache

View File

@ -4795,6 +4795,23 @@ Possible values:
Type: Bool
## output_format_compression_level
Default compression level if query output is compressed. The setting is applied when `SELECT` query has `INTO OUTFILE` or when writing to table functions `file`, `url`, `hdfs`, `s3`, or `azureBlobStorage`.
Possible values: from `1` to `22`
Default: `3`
## output_format_compression_zstd_window_log
Can be used when the output compression method is `zstd`. If greater than `0`, this setting explicitly sets compression window size (power of `2`) and enables a long-range mode for zstd compression. This can help to achieve a better compression ratio.
Possible values: non-negative numbers. Note that if the value is too small or too big, `zstdlib` will throw an exception. Typical values are from `20` (window size = `1MB`) to `30` (window size = `1GB`).
Default: `0`
## rewrite_count_distinct_if_with_count_distinct_implementation
Allows you to rewrite `countDistcintIf` with [count_distinct_implementation](#count_distinct_implementation) setting.

View File

@ -0,0 +1,14 @@
---
slug: /en/operations/system-tables/dropped_tables_parts
---
# dropped_tables_parts {#system_tables-dropped_tables_parts}
Contains information about parts of [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) dropped tables from [system.dropped_tables](./dropped_tables.md)
The schema of this table is the same as [system.parts](./parts.md)
**See Also**
- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md)
- [system.parts](./parts.md)
- [system.dropped_tables](./dropped_tables.md)

View File

@ -14,6 +14,11 @@ Columns:
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting was specified in `config.xml`
- `description` ([String](../../sql-reference/data-types/string.md)) — Short server setting description.
- `type` ([String](../../sql-reference/data-types/string.md)) — Server setting value type.
- `changeable_without_restart` ([Enum8](../../sql-reference/data-types/enum.md)) — Whether the setting can be changed at server runtime. Values:
- `'No' `
- `'IncreaseOnly'`
- `'DecreaseOnly'`
- `'Yes'`
- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) - Shows whether a setting is obsolete.
**Example**
@ -27,22 +32,21 @@ WHERE name LIKE '%thread_pool%'
```
``` text
┌─name────────────────────────────────────────_─value─_─default─_─changed─_─description──────────────────────────────────────────────────────────────────────────────────────────────────────
───────────────────────────────────_─type───_─is_obsolete─┐
│ max_thread_pool_size │ 10000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │ 0 │
│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │ 0 │
│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │ 0 │
│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │ 0 │
│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │ 0 │
│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │ 0 │
│ max_active_parts_loading_thread_pool_size │ 64 │ 64 │ 0 │ The number of threads to load active set of data parts (Active ones) at startup. │ UInt64 │ 0 │
│ max_outdated_parts_loading_thread_pool_size │ 32 │ 32 │ 0 │ The number of threads to load inactive set of data parts (Outdated ones) at startup. │ UInt64 │ 0 │
│ max_parts_cleaning_thread_pool_size │ 128 │ 128 │ 0 │ The number of threads for concurrent removal of inactive data parts. │ UInt64 │ 0 │
│ max_backups_io_thread_pool_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that would be used for IO operations for BACKUP queries │ UInt64 │ 0 │
│ max_backups_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for backups IO thread pool. │ UInt64 │ 0 │
│ backups_io_thread_pool_queue_size │ 0 │ 0 │ 0 │ Queue size for backups IO thread pool. │ UInt64 │ 0 │
└─────────────────────────────────────────────┴───────┴─────────┴─────────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────
───────────────────────────────────┴────────┴─────────────┘
┌─name────────────────────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┬─changeable_without_restart─┬─is_obsolete─┐
│ max_thread_pool_size │ 10000 │ 10000 │ 0 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │ No │ 0 │
│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │ No │ 0 │
│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │ No │ 0 │
│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │ No │ 0 │
│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │ No │ 0 │
│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │ No │ 0 │
│ max_active_parts_loading_thread_pool_size │ 64 │ 64 │ 0 │ The number of threads to load active set of data parts (Active ones) at startup. │ UInt64 │ No │ 0 │
│ max_outdated_parts_loading_thread_pool_size │ 32 │ 32 │ 0 │ The number of threads to load inactive set of data parts (Outdated ones) at startup. │ UInt64 │ No │ 0 │
│ max_parts_cleaning_thread_pool_size │ 128 │ 128 │ 0 │ The number of threads for concurrent removal of inactive data parts. │ UInt64 │ No │ 0 │
│ max_backups_io_thread_pool_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that would be used for IO operations for BACKUP queries │ UInt64 │ No │ 0 │
│ max_backups_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for backups IO thread pool. │ UInt64 │ No │ 0 │
│ backups_io_thread_pool_queue_size │ 0 │ 0 │ 0 │ Queue size for backups IO thread pool. │ UInt64 │ No │ 0 │
└─────────────────────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┴────────────────────────────┴─────────────┘
```
Using of `WHERE changed` can be useful, for example, when you want to check

View File

@ -11,6 +11,8 @@ Keys:
- `--query` — Format queries of any length and complexity.
- `--hilite` — Add syntax highlight with ANSI terminal escape sequences.
- `--oneline` — Format in single line.
- `--max_line_length` — Format in single line queries with length less than specified.
- `--comments` — Keep comments in the output.
- `--quiet` or `-q` — Just check syntax, no output on success.
- `--multiquery` or `-n` — Allow multiple queries in the same file.
- `--obfuscate` — Obfuscate instead of formatting.

View File

@ -24,7 +24,7 @@ A client application to interact with clickhouse-keeper by its native protocol.
## Example {#clickhouse-keeper-client-example}
```bash
./clickhouse-keeper-client -h localhost:9181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30
./clickhouse-keeper-client -h localhost -p 9181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30
Connected to ZooKeeper at [::1]:9181 with session_id 137
/ :) ls
keeper foo bar

View File

@ -1383,6 +1383,148 @@ Result:
└──────────────────┘
```
## punycodeEncode
Returns the [Punycode](https://en.wikipedia.org/wiki/Punycode) representation of a string.
The string must be UTF8-encoded, otherwise the behavior is undefined.
**Syntax**
``` sql
punycodeEncode(val)
```
**Arguments**
- `val` - Input value. [String](../data-types/string.md)
**Returned value**
- A Punycode representation of the input value. [String](../data-types/string.md)
**Example**
``` sql
select punycodeEncode('München');
```
Result:
```result
┌─punycodeEncode('München')─┐
│ Mnchen-3ya │
└───────────────────────────┘
```
## punycodeDecode
Returns the UTF8-encoded plaintext of a [Punycode](https://en.wikipedia.org/wiki/Punycode)-encoded string.
If no valid Punycode-encoded string is given, an exception is thrown.
**Syntax**
``` sql
punycodeEncode(val)
```
**Arguments**
- `val` - Punycode-encoded string. [String](../data-types/string.md)
**Returned value**
- The plaintext of the input value. [String](../data-types/string.md)
**Example**
``` sql
select punycodeDecode('Mnchen-3ya');
```
Result:
```result
┌─punycodeDecode('Mnchen-3ya')─┐
│ München │
└──────────────────────────────┘
```
## tryPunycodeDecode
Like `punycodeDecode` but returns an empty string if no valid Punycode-encoded string is given.
## idnaEncode
Returns the the ASCII representation (ToASCII algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism.
The input string must be UTF-encoded and translatable to an ASCII string, otherwise an exception is thrown.
Note: No percent decoding or trimming of tabs, spaces or control characters is performed.
**Syntax**
```sql
idnaEncode(val)
```
**Arguments**
- `val` - Input value. [String](../data-types/string.md)
**Returned value**
- A ASCII representation according to the IDNA mechanism of the input value. [String](../data-types/string.md)
**Example**
``` sql
select idnaEncode('straße.münchen.de');
```
Result:
```result
┌─idnaEncode('straße.münchen.de')─────┐
│ xn--strae-oqa.xn--mnchen-3ya.de │
└─────────────────────────────────────┘
```
## tryIdnaEncode
Like `idnaEncode` but returns an empty string in case of an error instead of throwing an exception.
## idnaDecode
Returns the the Unicode (UTF-8) representation (ToUnicode algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism.
In case of an error (e.g. because the input is invalid), the input string is returned.
Note that repeated application of `idnaEncode()` and `idnaDecode()` does not necessarily return the original string due to case normalization.
**Syntax**
```sql
idnaDecode(val)
```
**Arguments**
- `val` - Input value. [String](../data-types/string.md)
**Returned value**
- A Unicode (UTF-8) representation according to the IDNA mechanism of the input value. [String](../data-types/string.md)
**Example**
``` sql
select idnaDecode('xn--strae-oqa.xn--mnchen-3ya.de');
```
Result:
```result
┌─idnaDecode('xn--strae-oqa.xn--mnchen-3ya.de')─┐
│ straße.münchen.de │
└───────────────────────────────────────────────┘
```
## byteHammingDistance
Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings.

View File

@ -57,3 +57,56 @@ Result:
│ 6 │
└─────────┘
```
## seriesDecomposeSTL
Decomposes a time series using STL [(Seasonal-Trend Decomposition Procedure Based on Loess)](https://www.wessa.net/download/stl.pdf) into a season, a trend and a residual component.
**Syntax**
``` sql
seriesDecomposeSTL(series, period);
```
**Arguments**
- `series` - An array of numeric values
- `period` - A positive integer
The number of data points in `series` should be at least twice the value of `period`.
**Returned value**
- An array of three arrays where the first array include seasonal components, the second array - trend,
and the third array - residue component.
Type: [Array](../../sql-reference/data-types/array.md).
**Examples**
Query:
``` sql
SELECT seriesDecomposeSTL([10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34], 3) AS print_0;
```
Result:
``` text
┌───────────print_0──────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ [[
-13.529999, -3.1799996, 16.71, -13.53, -3.1799996, 16.71, -13.53, -3.1799996,
16.71, -13.530001, -3.18, 16.710001, -13.530001, -3.1800003, 16.710001, -13.530001,
-3.1800003, 16.710001, -13.530001, -3.1799994, 16.71, -13.529999, -3.1799994, 16.709997
],
[
23.63, 23.63, 23.630003, 23.630001, 23.630001, 23.630001, 23.630001, 23.630001,
23.630001, 23.630001, 23.630001, 23.63, 23.630001, 23.630001, 23.63, 23.630001,
23.630001, 23.63, 23.630001, 23.630001, 23.630001, 23.630001, 23.630001, 23.630003
],
[
0, 0.0000019073486, -0.0000019073486, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.0000019073486, 0,
0
]] │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```

View File

@ -1559,7 +1559,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
QueryPipeline input;
QueryPipeline output;
{
BlockIO io_insert = InterpreterFactory::get(query_insert_ast, context_insert)->execute();
BlockIO io_insert = InterpreterFactory::instance().get(query_insert_ast, context_insert)->execute();
InterpreterSelectWithUnionQuery select(query_select_ast, context_select, SelectQueryOptions{});
QueryPlan plan;
@ -1944,7 +1944,7 @@ bool ClusterCopier::checkShardHasPartition(const ConnectionTimeouts & timeouts,
auto local_context = Context::createCopy(context);
local_context->setSettings(task_cluster->settings_pull);
auto pipeline = InterpreterFactory::get(query_ast, local_context)->execute().pipeline;
auto pipeline = InterpreterFactory::instance().get(query_ast, local_context)->execute().pipeline;
PullingPipelineExecutor executor(pipeline);
Block block;
executor.pull(block);
@ -1989,7 +1989,7 @@ bool ClusterCopier::checkPresentPartitionPiecesOnCurrentShard(const ConnectionTi
auto local_context = Context::createCopy(context);
local_context->setSettings(task_cluster->settings_pull);
auto pipeline = InterpreterFactory::get(query_ast, local_context)->execute().pipeline;
auto pipeline = InterpreterFactory::instance().get(query_ast, local_context)->execute().pipeline;
PullingPipelineExecutor executor(pipeline);
Block result;
executor.pull(result);

View File

@ -4,6 +4,7 @@
#include <Common/TerminalSize.h>
#include <Databases/registerDatabases.h>
#include <IO/ConnectionTimeouts.h>
#include <Interpreters/registerInterpreters.h>
#include <Formats/registerFormats.h>
#include <Common/scope_guard_safe.h>
#include <unistd.h>
@ -157,6 +158,7 @@ void ClusterCopierApp::mainImpl()
context->setApplicationType(Context::ApplicationType::LOCAL);
context->setPath(process_path + "/");
registerInterpreters();
registerFunctions();
registerAggregateFunctions();
registerTableFunctions();

View File

@ -3,16 +3,19 @@
#include <string_view>
#include <boost/program_options.hpp>
#include <IO/copyData.h>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/WriteBufferFromOStream.h>
#include <Interpreters/registerInterpreters.h>
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/obfuscateQueries.h>
#include <Parsers/parseQuery.h>
#include <Common/ErrorCodes.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/TerminalSize.h>
#include <Interpreters/Context.h>
@ -29,22 +32,49 @@
#include <DataTypes/DataTypeFactory.h>
#include <Formats/FormatFactory.h>
#include <Formats/registerFormats.h>
#include <Processors/Transforms/getSourceFromASTInsertQuery.h>
namespace DB::ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
namespace
{
void skipSpacesAndComments(const char*& pos, const char* end, bool print_comments)
{
do
{
/// skip spaces to avoid throw exception after last query
while (pos != end && std::isspace(*pos))
++pos;
const char * comment_begin = pos;
/// for skip comment after the last query and to not throw exception
if (end - pos > 2 && *pos == '-' && *(pos + 1) == '-')
{
pos += 2;
/// skip until the end of the line
while (pos != end && *pos != '\n')
++pos;
if (print_comments)
std::cout << std::string_view(comment_begin, pos - comment_begin) << "\n";
}
/// need to parse next sql
else
break;
} while (pos != end);
}
}
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wmissing-declarations"
extern const char * auto_time_zones[];
namespace DB
{
namespace ErrorCodes
{
extern const int INVALID_FORMAT_INSERT_QUERY_WITH_DATA;
}
}
int mainEntryClickHouseFormat(int argc, char ** argv)
{
using namespace DB;
@ -55,8 +85,10 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
desc.add_options()
("query", po::value<std::string>(), "query to format")
("help,h", "produce help message")
("comments", "keep comments in the output")
("hilite", "add syntax highlight with ANSI terminal escape sequences")
("oneline", "format in single line")
("max_line_length", po::value<size_t>()->default_value(0), "format in single line queries with length less than specified")
("quiet,q", "just check syntax, no output on success")
("multiquery,n", "allow multiple queries in the same file")
("obfuscate", "obfuscate instead of formatting")
@ -88,6 +120,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
bool oneline = options.count("oneline");
bool quiet = options.count("quiet");
bool multiple = options.count("multiquery");
bool print_comments = options.count("comments");
size_t max_line_length = options["max_line_length"].as<size_t>();
bool obfuscate = options.count("obfuscate");
bool backslash = options.count("backslash");
bool allow_settings_after_format_in_insert = options.count("allow_settings_after_format_in_insert");
@ -104,6 +138,19 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
return 2;
}
if (oneline && max_line_length)
{
std::cerr << "Options 'oneline' and 'max_line_length' are mutually exclusive." << std::endl;
return 2;
}
if (max_line_length > 255)
{
std::cerr << "Option 'max_line_length' must be less than 256." << std::endl;
return 2;
}
String query;
if (options.count("query"))
@ -124,10 +171,10 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
if (options.count("seed"))
{
std::string seed;
hash_func.update(options["seed"].as<std::string>());
}
registerInterpreters();
registerFunctions();
registerAggregateFunctions();
registerTableFunctions();
@ -179,30 +226,75 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
{
const char * pos = query.data();
const char * end = pos + query.size();
skipSpacesAndComments(pos, end, print_comments);
ParserQuery parser(end, allow_settings_after_format_in_insert);
do
while (pos != end)
{
size_t approx_query_length = multiple ? find_first_symbols<';'>(pos, end) - pos : end - pos;
ASTPtr res = parseQueryAndMovePosition(
parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth);
/// For insert query with data(INSERT INTO ... VALUES ...), that will lead to the formatting failure,
/// we should throw an exception early, and make exception message more readable.
if (const auto * insert_query = res->as<ASTInsertQuery>(); insert_query && insert_query->data)
std::unique_ptr<ReadBuffer> insert_query_payload = nullptr;
/// If the query is INSERT ... VALUES, then we will try to parse the data.
if (auto * insert_query = res->as<ASTInsertQuery>(); insert_query && insert_query->data)
{
throw Exception(DB::ErrorCodes::INVALID_FORMAT_INSERT_QUERY_WITH_DATA,
"Can't format ASTInsertQuery with data, since data will be lost");
if ("Values" != insert_query->format)
throw Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Can't format INSERT query with data format '{}'", insert_query->format);
/// Reset format to default to have `INSERT INTO table VALUES` instead of `INSERT INTO table VALUES FORMAT Values`
insert_query->format = {};
/// We assume that data ends with a newline character (same as client does)
const char * this_query_end = find_first_symbols<'\n'>(insert_query->data, end);
insert_query->end = this_query_end;
pos = this_query_end;
insert_query_payload = getReadBufferFromASTInsertQuery(res);
}
if (!quiet)
{
if (!backslash)
{
WriteBufferFromOStream res_buf(std::cout, 4096);
formatAST(*res, res_buf, hilite, oneline);
res_buf.finalize();
if (multiple)
WriteBufferFromOwnString str_buf;
formatAST(*res, str_buf, hilite, oneline || approx_query_length < max_line_length);
if (insert_query_payload)
{
str_buf.write(' ');
copyData(*insert_query_payload, str_buf);
}
String res_string = str_buf.str();
const char * s_pos = res_string.data();
const char * s_end = s_pos + res_string.size();
/// remove trailing spaces
while (s_end > s_pos && isWhitespaceASCIIOneLine(*(s_end - 1)))
--s_end;
WriteBufferFromOStream res_cout(std::cout, 4096);
/// For multiline queries we print ';' at new line,
/// but for single line queries we print ';' at the same line
bool has_multiple_lines = false;
while (s_pos != s_end)
{
if (*s_pos == '\n')
has_multiple_lines = true;
res_cout.write(*s_pos++);
}
res_cout.finalize();
if (multiple && !insert_query_payload)
{
if (oneline || !has_multiple_lines)
std::cout << ";\n";
else
std::cout << "\n;\n";
}
else if (multiple && insert_query_payload)
/// Do not need to add ; because it's already in the insert_query_payload
std::cout << "\n";
std::cout << std::endl;
}
/// add additional '\' at the end of each line;
@ -230,27 +322,10 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
std::cout << std::endl;
}
}
do
{
/// skip spaces to avoid throw exception after last query
while (pos != end && std::isspace(*pos))
++pos;
/// for skip comment after the last query and to not throw exception
if (end - pos > 2 && *pos == '-' && *(pos + 1) == '-')
{
pos += 2;
/// skip until the end of the line
while (pos != end && *pos != '\n')
++pos;
}
/// need to parse next sql
else
skipSpacesAndComments(pos, end, print_comments);
if (!multiple)
break;
} while (pos != end);
} while (multiple && pos != end);
}
}
}
catch (...)

View File

@ -335,7 +335,7 @@ try
else if (std::filesystem::is_directory(std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"))
{
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
"By default 'keeper.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper.storage_path' in the keeper configuration explicitly",
"By default 'keeper_server.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper_server.storage_path' in the keeper configuration explicitly",
KEEPER_DEFAULT_PATH, String{std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"});
}
else

View File

@ -20,6 +20,7 @@
#include <Interpreters/JIT/CompiledExpressionCache.h>
#include <Interpreters/ProcessList.h>
#include <Interpreters/loadMetadata.h>
#include <Interpreters/registerInterpreters.h>
#include <base/getFQDNOrHostName.h>
#include <Common/scope_guard_safe.h>
#include <Interpreters/Session.h>
@ -486,6 +487,7 @@ try
Poco::ErrorHandler::set(&error_handler);
}
registerInterpreters();
/// Don't initialize DateLUT
registerFunctions();
registerAggregateFunctions();

View File

@ -58,6 +58,7 @@
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/ProcessList.h>
#include <Interpreters/loadMetadata.h>
#include <Interpreters/registerInterpreters.h>
#include <Interpreters/JIT/CompiledExpressionCache.h>
#include <Access/AccessControl.h>
#include <Storages/StorageReplicatedMergeTree.h>
@ -646,6 +647,7 @@ try
}
#endif
registerInterpreters();
registerFunctions();
registerAggregateFunctions();
registerTableFunctions();

View File

@ -713,11 +713,11 @@
For example, if there two users A, B and a row policy is defined only for A, then
if this setting is true the user B will see all rows, and if this setting is false the user B will see no rows.
By default this setting is false for compatibility with earlier access configurations. -->
<users_without_row_policies_can_read_rows>false</users_without_row_policies_can_read_rows>
<users_without_row_policies_can_read_rows>true</users_without_row_policies_can_read_rows>
<!-- By default, for backward compatibility ON CLUSTER queries ignore CLUSTER grant,
however you can change this behaviour by setting this to true -->
<on_cluster_queries_require_cluster_grant>false</on_cluster_queries_require_cluster_grant>
<on_cluster_queries_require_cluster_grant>true</on_cluster_queries_require_cluster_grant>
<!-- By default, for backward compatibility "SELECT * FROM system.<table>" doesn't require any grants and can be executed
by any user. You can change this behaviour by setting this to true.
@ -725,19 +725,19 @@
Exceptions: a few system tables ("tables", "columns", "databases", and some constant tables like "one", "contributors")
are still accessible for everyone; and if there is a SHOW privilege (e.g. "SHOW USERS") granted the corresponding system
table (i.e. "system.users") will be accessible. -->
<select_from_system_db_requires_grant>false</select_from_system_db_requires_grant>
<select_from_system_db_requires_grant>true</select_from_system_db_requires_grant>
<!-- By default, for backward compatibility "SELECT * FROM information_schema.<table>" doesn't require any grants and can be
executed by any user. You can change this behaviour by setting this to true.
If it's set to true then this query requires "GRANT SELECT ON information_schema.<table>" just like as for ordinary tables. -->
<select_from_information_schema_requires_grant>false</select_from_information_schema_requires_grant>
<select_from_information_schema_requires_grant>true</select_from_information_schema_requires_grant>
<!-- By default, for backward compatibility a settings profile constraint for a specific setting inherit every not set field from
previous profile. You can change this behaviour by setting this to true.
If it's set to true then if settings profile has a constraint for a specific setting, then this constraint completely cancels all
actions of previous constraint (defined in other profiles) for the same specific setting, including fields that are not set by new constraint.
It also enables 'changeable_in_readonly' constraint type -->
<settings_constraints_replace_previous>false</settings_constraints_replace_previous>
<settings_constraints_replace_previous>true</settings_constraints_replace_previous>
<!-- Number of seconds since last access a role is stored in the Role Cache -->
<role_cache_expiration_time_seconds>600</role_cache_expiration_time_seconds>

View File

@ -77,7 +77,6 @@ static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggesti
};
add_column("name", "functions", false, {});
add_column("name", "database_engines", false, {});
add_column("name", "table_engines", false, {});
add_column("name", "formats", false, {});
add_column("name", "table_functions", false, {});

View File

@ -43,6 +43,19 @@ void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, Atomic
}
}
void cancelOnDependencyFailure(const LoadJobPtr & self, const LoadJobPtr & dependency, std::exception_ptr & cancel)
{
cancel = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED,
"Load job '{}' -> {}",
self->name,
getExceptionMessage(dependency->exception(), /* with_stacktrace = */ false)));
}
void ignoreDependencyFailure(const LoadJobPtr &, const LoadJobPtr &, std::exception_ptr &)
{
// No-op
}
LoadStatus LoadJob::status() const
{
std::unique_lock lock{mutex};
@ -96,7 +109,10 @@ size_t LoadJob::canceled(const std::exception_ptr & ptr)
size_t LoadJob::finish()
{
func = {}; // To ensure job function is destructed before `AsyncLoader::wait()` return
// To ensure functions are destructed before `AsyncLoader::wait()` return
func = {};
dependency_failure = {};
finish_time = std::chrono::system_clock::now();
if (waiters > 0)
finished.notify_all();
@ -327,20 +343,22 @@ void AsyncLoader::schedule(const LoadJobSet & jobs_to_schedule)
if (dep_status == LoadStatus::FAILED || dep_status == LoadStatus::CANCELED)
{
// Dependency on already failed or canceled job -- it's okay. Cancel all dependent jobs.
std::exception_ptr e;
// Dependency on already failed or canceled job -- it's okay.
// Process as usual (may lead to cancel of all dependent jobs).
std::exception_ptr cancel;
NOEXCEPT_SCOPE({
ALLOW_ALLOCATIONS_IN_SCOPE;
e = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED,
"Load job '{}' -> {}",
job->name,
getExceptionMessage(dep->exception(), /* with_stacktrace = */ false)));
if (job->dependency_failure)
job->dependency_failure(job, dep, cancel);
});
finish(job, LoadStatus::CANCELED, e, lock);
if (cancel)
{
finish(job, LoadStatus::CANCELED, cancel, lock);
break; // This job is now finished, stop its dependencies processing
}
}
}
}
else
{
// Job was already canceled on previous iteration of this cycle -- skip
@ -515,64 +533,77 @@ String AsyncLoader::checkCycle(const LoadJobPtr & job, LoadJobSet & left, LoadJo
return {};
}
void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job, std::unique_lock<std::mutex> & lock)
void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr reason, std::unique_lock<std::mutex> & lock)
{
chassert(scheduled_jobs.contains(job)); // Job was pending
// Notify waiters
size_t resumed_workers = 0; // Number of workers resumed in the execution pool of the job
if (status == LoadStatus::OK)
{
// Notify waiters
resumed_workers += job->ok();
// Update dependent jobs and enqueue if ready
for (const auto & dep : scheduled_jobs[job].dependent_jobs)
{
chassert(scheduled_jobs.contains(dep)); // All depended jobs must be pending
Info & dep_info = scheduled_jobs[dep];
dep_info.dependencies_left--;
if (!dep_info.isBlocked())
enqueue(dep_info, dep, lock);
}
}
else
{
// Notify waiters
if (status == LoadStatus::FAILED)
resumed_workers += job->failed(exception_from_job);
resumed_workers = job->ok();
else if (status == LoadStatus::FAILED)
resumed_workers = job->failed(reason);
else if (status == LoadStatus::CANCELED)
resumed_workers += job->canceled(exception_from_job);
resumed_workers = job->canceled(reason);
// Adjust suspended workers count
if (resumed_workers)
{
Pool & pool = pools[job->executionPool()];
pool.suspended_workers -= resumed_workers;
}
Info & info = scheduled_jobs[job];
if (info.isReady())
{
// Job could be in ready queue (on cancel) -- must be dequeued
pools[job->pool_id].ready_queue.erase(info.ready_seqno);
info.ready_seqno = 0;
}
// Recurse into all dependent jobs
// To avoid container modification during recursion (during clean dependency graph edges below)
LoadJobSet dependent;
dependent.swap(info.dependent_jobs); // To avoid container modification during recursion
for (const auto & dep : dependent)
dependent.swap(info.dependent_jobs);
// Update dependent jobs
for (const auto & dpt : dependent)
{
if (!scheduled_jobs.contains(dep))
continue; // Job has already been canceled
std::exception_ptr e;
if (auto dpt_info = scheduled_jobs.find(dpt); dpt_info != scheduled_jobs.end())
{
dpt_info->second.dependencies_left--;
if (!dpt_info->second.isBlocked())
enqueue(dpt_info->second, dpt, lock);
if (status != LoadStatus::OK)
{
std::exception_ptr cancel;
NOEXCEPT_SCOPE({
ALLOW_ALLOCATIONS_IN_SCOPE;
e = std::make_exception_ptr(
Exception(ErrorCodes::ASYNC_LOAD_CANCELED,
"Load job '{}' -> {}",
dep->name,
getExceptionMessage(exception_from_job, /* with_stacktrace = */ false)));
if (dpt->dependency_failure)
dpt->dependency_failure(dpt, job, cancel);
});
finish(dep, LoadStatus::CANCELED, e, lock);
// Recurse into dependent job if it should be canceled
if (cancel)
finish(dpt, LoadStatus::CANCELED, cancel, lock);
}
}
else
{
// Job has already been canceled. Do not enter twice into the same job during finish recursion.
// This happens in {A<-B; A<-C; B<-D; C<-D} graph for D if A is failed or canceled.
chassert(status == LoadStatus::CANCELED);
}
}
// Clean dependency graph edges pointing to canceled jobs
if (status != LoadStatus::OK)
{
for (const auto & dep : job->dependencies)
{
if (auto dep_info = scheduled_jobs.find(dep); dep_info != scheduled_jobs.end())
dep_info->second.dependent_jobs.erase(job);
}
}
// Job became finished
scheduled_jobs.erase(job);
@ -582,12 +613,6 @@ void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::excepti
if (log_progress)
logAboutProgress(log, finished_jobs.size() - old_jobs, finished_jobs.size() + scheduled_jobs.size() - old_jobs, stopwatch);
});
if (resumed_workers)
{
Pool & pool = pools[job->executionPool()];
pool.suspended_workers -= resumed_workers;
}
}
void AsyncLoader::prioritize(const LoadJobPtr & job, size_t new_pool_id, std::unique_lock<std::mutex> & lock)
@ -612,6 +637,9 @@ void AsyncLoader::prioritize(const LoadJobPtr & job, size_t new_pool_id, std::un
}
job->pool_id.store(new_pool_id);
// TODO(serxa): we should adjust suspended_workers and suspended_waiters here.
// Otherwise suspended_workers we be left inconsistent. Fix it and add a test.
// Scenario: schedule a job A, wait for it from a job B in the same pool, prioritize A
// Recurse into dependencies
for (const auto & dep : job->dependencies)

View File

@ -1,6 +1,7 @@
#pragma once
#include <condition_variable>
#include <concepts>
#include <exception>
#include <memory>
#include <map>
@ -57,12 +58,13 @@ enum class LoadStatus
class LoadJob : private boost::noncopyable
{
public:
template <class Func, class LoadJobSetType>
LoadJob(LoadJobSetType && dependencies_, String name_, size_t pool_id_, Func && func_)
template <class LoadJobSetType, class Func, class DFFunc>
LoadJob(LoadJobSetType && dependencies_, String name_, size_t pool_id_, DFFunc && dependency_failure_, Func && func_)
: dependencies(std::forward<LoadJobSetType>(dependencies_))
, name(std::move(name_))
, execution_pool_id(pool_id_)
, pool_id(pool_id_)
, dependency_failure(std::forward<DFFunc>(dependency_failure_))
, func(std::forward<Func>(func_))
{}
@ -108,6 +110,14 @@ private:
std::atomic<UInt64> job_id{0};
std::atomic<size_t> execution_pool_id;
std::atomic<size_t> pool_id;
// Handler for failed or canceled dependencies.
// If job needs to be canceled on `dependency` failure, then function should set `cancel` to a specific reason.
// Note that implementation should be fast and cannot use AsyncLoader, because it is called under `AsyncLoader::mutex`.
// Note that `dependency_failure` is called only on pending jobs.
std::function<void(const LoadJobPtr & self, const LoadJobPtr & dependency, std::exception_ptr & cancel)> dependency_failure;
// Function to be called to execute the job.
std::function<void(AsyncLoader & loader, const LoadJobPtr & self)> func;
mutable std::mutex mutex;
@ -123,35 +133,54 @@ private:
std::atomic<TimePoint> finish_time{TimePoint{}};
};
struct EmptyJobFunc
{
void operator()(AsyncLoader &, const LoadJobPtr &) {}
};
// For LoadJob::dependency_failure. Cancels the job on the first dependency failure or cancel.
void cancelOnDependencyFailure(const LoadJobPtr & self, const LoadJobPtr & dependency, std::exception_ptr & cancel);
template <class Func = EmptyJobFunc>
LoadJobPtr makeLoadJob(LoadJobSet && dependencies, String name, Func && func = EmptyJobFunc())
// For LoadJob::dependency_failure. Never cancels the job due to dependency failure or cancel.
void ignoreDependencyFailure(const LoadJobPtr & self, const LoadJobPtr & dependency, std::exception_ptr & cancel);
template <class F> concept LoadJobDependencyFailure = std::invocable<F, const LoadJobPtr &, const LoadJobPtr &, std::exception_ptr &>;
template <class F> concept LoadJobFunc = std::invocable<F, AsyncLoader &, const LoadJobPtr &>;
LoadJobPtr makeLoadJob(LoadJobSet && dependencies, String name, LoadJobDependencyFailure auto && dependency_failure, LoadJobFunc auto && func)
{
return std::make_shared<LoadJob>(std::move(dependencies), std::move(name), 0, std::forward<Func>(func));
return std::make_shared<LoadJob>(std::move(dependencies), std::move(name), 0, std::forward<decltype(dependency_failure)>(dependency_failure), std::forward<decltype(func)>(func));
}
template <class Func = EmptyJobFunc>
LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, String name, Func && func = EmptyJobFunc())
LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, String name, LoadJobDependencyFailure auto && dependency_failure, LoadJobFunc auto && func)
{
return std::make_shared<LoadJob>(dependencies, std::move(name), 0, std::forward<Func>(func));
return std::make_shared<LoadJob>(dependencies, std::move(name), 0, std::forward<decltype(dependency_failure)>(dependency_failure), std::forward<decltype(func)>(func));
}
template <class Func = EmptyJobFunc>
LoadJobPtr makeLoadJob(LoadJobSet && dependencies, size_t pool_id, String name, Func && func = EmptyJobFunc())
LoadJobPtr makeLoadJob(LoadJobSet && dependencies, size_t pool_id, String name, LoadJobDependencyFailure auto && dependency_failure, LoadJobFunc auto && func)
{
return std::make_shared<LoadJob>(std::move(dependencies), std::move(name), pool_id, std::forward<Func>(func));
return std::make_shared<LoadJob>(std::move(dependencies), std::move(name), pool_id, std::forward<decltype(dependency_failure)>(dependency_failure), std::forward<decltype(func)>(func));
}
template <class Func = EmptyJobFunc>
LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, size_t pool_id, String name, Func && func = EmptyJobFunc())
LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, size_t pool_id, String name, LoadJobDependencyFailure auto && dependency_failure, LoadJobFunc auto && func)
{
return std::make_shared<LoadJob>(dependencies, std::move(name), pool_id, std::forward<Func>(func));
return std::make_shared<LoadJob>(dependencies, std::move(name), pool_id, std::forward<decltype(dependency_failure)>(dependency_failure), std::forward<decltype(func)>(func));
}
LoadJobPtr makeLoadJob(LoadJobSet && dependencies, String name, LoadJobFunc auto && func)
{
return std::make_shared<LoadJob>(std::move(dependencies), std::move(name), 0, cancelOnDependencyFailure, std::forward<decltype(func)>(func));
}
LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, String name, LoadJobFunc auto && func)
{
return std::make_shared<LoadJob>(dependencies, std::move(name), 0, cancelOnDependencyFailure, std::forward<decltype(func)>(func));
}
LoadJobPtr makeLoadJob(LoadJobSet && dependencies, size_t pool_id, String name, LoadJobFunc auto && func)
{
return std::make_shared<LoadJob>(std::move(dependencies), std::move(name), pool_id, cancelOnDependencyFailure, std::forward<decltype(func)>(func));
}
LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, size_t pool_id, String name, LoadJobFunc auto && func)
{
return std::make_shared<LoadJob>(dependencies, std::move(name), pool_id, cancelOnDependencyFailure, std::forward<decltype(func)>(func));
}
// Represents a logically connected set of LoadJobs required to achieve some goals (final LoadJob in the set).
class LoadTask : private boost::noncopyable
@ -277,7 +306,7 @@ private:
{
size_t dependencies_left = 0; // Current number of dependencies on pending jobs.
UInt64 ready_seqno = 0; // Zero means that job is not in ready queue.
LoadJobSet dependent_jobs; // Set of jobs dependent on this job.
LoadJobSet dependent_jobs; // Set of jobs dependent on this job. Contains only scheduled jobs.
// Three independent states of a scheduled job.
bool isBlocked() const { return dependencies_left > 0; }
@ -371,7 +400,7 @@ public:
private:
void checkCycle(const LoadJobSet & jobs, std::unique_lock<std::mutex> & lock);
String checkCycle(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock<std::mutex> & lock);
void finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job, std::unique_lock<std::mutex> & lock);
void finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr reason, std::unique_lock<std::mutex> & lock);
void gatherNotScheduled(const LoadJobPtr & job, LoadJobSet & jobs, std::unique_lock<std::mutex> & lock);
void prioritize(const LoadJobPtr & job, size_t new_pool_id, std::unique_lock<std::mutex> & lock);
void enqueue(Info & info, const LoadJobPtr & job, std::unique_lock<std::mutex> & lock);

View File

@ -1,10 +1,11 @@
#pragma once
#include <list>
#include <memory>
#include <mutex>
#include <optional>
#include <base/types.h>
#include <boost/core/noncopyable.hpp>
#include <mutex>
#include <memory>
#include <list>
namespace DB

View File

@ -242,7 +242,7 @@
M(FilesystemCacheDelayedCleanupElements, "Filesystem cache elements in background cleanup queue") \
M(FilesystemCacheHoldFileSegments, "Filesystem cache file segment which are currently hold as unreleasable") \
M(AsyncInsertCacheSize, "Number of async insert hash id in cache") \
M(S3Requests, "S3 requests") \
M(S3Requests, "S3 requests count") \
M(KeeperAliveConnections, "Number of alive connections") \
M(KeeperOutstandingRequets, "Number of outstanding requests") \
M(ThreadsInOvercommitTracker, "Number of waiting threads inside of OvercommitTracker") \

View File

@ -0,0 +1,494 @@
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
# pragma clang diagnostic ignored "-Wgnu-anonymous-struct"
# pragma clang diagnostic ignored "-Wnested-anon-types"
# pragma clang diagnostic ignored "-Wunused-parameter"
# pragma clang diagnostic ignored "-Wshadow-field-in-constructor"
# pragma clang diagnostic ignored "-Wdtor-name"
#endif
#include <re2/re2.h>
#include <re2/regexp.h>
#include <re2/walker-inl.h>
#ifdef __clang__
# pragma clang diagnostic pop
#endif
#ifdef LOG_INFO
#undef LOG_INFO
#undef LOG_WARNING
#undef LOG_ERROR
#undef LOG_FATAL
#endif
#include "MatchGenerator.h"
#include <Common/Exception.h>
#include <Common/thread_local_rng.h>
#include <map>
#include <functional>
#include <magic_enum.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}
}
namespace re2
{
class RandomStringPrepareWalker : public Regexp::Walker<Regexp *>
{
private:
static constexpr int ImplicitMax = 100;
using Children = std::vector<Regexp *>;
class Generators;
/// This function objects look much prettier than lambda expression when stack traces are printed
class NodeFunction
{
public:
virtual size_t operator() (char * out, size_t size) = 0;
virtual size_t getRequiredSize() = 0;
virtual ~NodeFunction() = default;
};
using NodeFunctionPtr = std::shared_ptr<NodeFunction>;
using NodeFuncs = std::vector<NodeFunctionPtr>;
static NodeFuncs getFuncs(const Children & children_, const Generators & generators_)
{
NodeFuncs result;
result.reserve(children_.size());
for (auto * child: children_)
{
result.push_back(generators_.at(child));
}
return result;
}
class Generators: public std::map<re2::Regexp *, NodeFunctionPtr> {};
class RegexpConcatFunction : public NodeFunction
{
public:
RegexpConcatFunction(const Children & children_, const Generators & generators_)
: children(getFuncs(children_, generators_))
{
}
size_t operator () (char * out, size_t size) override
{
size_t total_size = 0;
for (auto & child: children)
{
size_t consumed = child->operator()(out, size);
chassert(consumed <= size);
out += consumed;
size -= consumed;
total_size += consumed;
}
return total_size;
}
size_t getRequiredSize() override
{
size_t total_size = 0;
for (auto & child: children)
total_size += child->getRequiredSize();
return total_size;
}
private:
NodeFuncs children;
};
class RegexpAlternateFunction : public NodeFunction
{
public:
RegexpAlternateFunction(const Children & children_, const Generators & generators_)
: children(getFuncs(children_, generators_))
{
}
size_t operator () (char * out, size_t size) override
{
std::uniform_int_distribution<int> distribution(0, static_cast<int>(children.size()-1));
int chosen = distribution(thread_local_rng);
size_t consumed = children[chosen]->operator()(out, size);
chassert(consumed <= size);
return consumed;
}
size_t getRequiredSize() override
{
size_t total_size = 0;
for (auto & child: children)
total_size = std::max(total_size, child->getRequiredSize());
return total_size;
}
private:
NodeFuncs children;
};
class RegexpRepeatFunction : public NodeFunction
{
public:
RegexpRepeatFunction(Regexp * re_, const Generators & generators_, int min_repeat_, int max_repeat_)
: func(generators_.at(re_))
, min_repeat(min_repeat_)
, max_repeat(max_repeat_)
{
}
size_t operator () (char * out, size_t size) override
{
std::uniform_int_distribution<int> distribution(min_repeat, max_repeat);
int ntimes = distribution(thread_local_rng);
size_t total_size = 0;
for (int i = 0; i < ntimes; ++i)
{
size_t consumed =func->operator()(out, size);
chassert(consumed <= size);
out += consumed;
size -= consumed;
total_size += consumed;
}
return total_size;
}
size_t getRequiredSize() override
{
return max_repeat * func->getRequiredSize();
}
private:
NodeFunctionPtr func;
int min_repeat = 0;
int max_repeat = 0;
};
class RegexpCharClassFunction : public NodeFunction
{
using CharRanges = std::vector<std::pair<re2::Rune, re2::Rune>>;
public:
explicit RegexpCharClassFunction(Regexp * re_)
{
CharClass * cc = re_->cc();
chassert(cc);
if (cc->empty())
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "kRegexpCharClass is empty");
char_count = cc->size();
char_ranges.reserve(std::distance(cc->begin(), cc->end()));
for (const auto range: *cc)
{
char_ranges.emplace_back(range.lo, range.hi);
}
}
size_t operator () (char * out, size_t size) override
{
chassert(UTFmax <= size);
std::uniform_int_distribution<int> distribution(1, char_count);
int chosen = distribution(thread_local_rng);
int count_down = chosen;
auto it = char_ranges.begin();
for (; it != char_ranges.end(); ++it)
{
auto [lo, hi] = *it;
auto range_len = hi - lo + 1;
if (count_down <= range_len)
break;
count_down -= range_len;
}
if (it == char_ranges.end())
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR,
"Unable to choose the rune. Runes {}, ranges {}, chosen {}",
char_count, char_ranges.size(), chosen);
auto [lo, _] = *it;
Rune r = lo + count_down - 1;
return re2::runetochar(out, &r);
}
size_t getRequiredSize() override
{
return UTFmax;
}
private:
int char_count = 0;
CharRanges char_ranges;
};
class RegexpLiteralStringFunction : public NodeFunction
{
public:
explicit RegexpLiteralStringFunction(Regexp * re_)
{
if (re_->nrunes() == 0)
return;
char buffer[UTFmax];
for (int i = 0; i < re_->nrunes(); ++i)
{
int n = re2::runetochar(buffer, &re_->runes()[i]);
literal_string += String(buffer, n);
}
}
size_t operator () (char * out, size_t size) override
{
chassert(literal_string.size() <= size);
memcpy(out, literal_string.data(), literal_string.size());
return literal_string.size();
}
size_t getRequiredSize() override
{
return literal_string.size();
}
private:
String literal_string;
};
class RegexpLiteralFunction : public NodeFunction
{
public:
explicit RegexpLiteralFunction(Regexp * re_)
{
char buffer[UTFmax];
Rune r = re_->rune();
int n = re2::runetochar(buffer, &r);
literal = String(buffer, n);
}
size_t operator () (char * out, size_t size) override
{
chassert(literal.size() <= size);
memcpy(out, literal.data(), literal.size());
return literal.size();
}
size_t getRequiredSize() override
{
return literal.size();
}
private:
String literal;
};
class ThrowExceptionFunction : public NodeFunction
{
public:
explicit ThrowExceptionFunction(Regexp * re_)
: operation(magic_enum::enum_name(re_->op()))
{
}
size_t operator () (char *, size_t) override
{
throw DB::Exception(
DB::ErrorCodes::BAD_ARGUMENTS,
"RandomStringPrepareWalker: regexp node '{}' is not supported for generating a random match",
operation);
}
size_t getRequiredSize() override
{
return 0;
}
private:
String operation;
};
public:
std::function<String()> getGenerator()
{
if (root == nullptr)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "no root has been set");
if (generators.empty())
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "no generators");
auto root_func = generators.at(root);
auto required_buffer_size = root_func->getRequiredSize();
auto generator_func = [=] ()
-> String
{
auto buffer = String(required_buffer_size, '\0');
size_t size = root_func->operator()(buffer.data(), buffer.size());
buffer.resize(size);
return buffer;
};
root = nullptr;
generators = {};
return std::move(generator_func);
}
private:
Children CopyChildrenArgs(Regexp ** children, int nchild)
{
Children result;
result.reserve(nchild);
for (int i = 0; i < nchild; ++i)
result.push_back(Copy(children[i]));
return result;
}
Regexp * ShortVisit(Regexp* /*re*/, Regexp * /*parent_arg*/) override
{
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "ShortVisit should not be called");
}
Regexp * PreVisit(Regexp * re, Regexp * parent_arg, bool* /*stop*/) override /*noexcept*/
{
if (parent_arg == nullptr)
{
chassert(root == nullptr);
chassert(re != nullptr);
root = re;
}
return re;
}
Regexp * PostVisit(Regexp * re, Regexp * /*parent_arg*/, Regexp * pre_arg,
Regexp ** child_args, int nchild_args) override /*noexcept*/
{
switch (re->op())
{
case kRegexpConcat: // Matches concatenation of sub_[0..nsub-1].
generators[re] = std::make_shared<RegexpConcatFunction>(CopyChildrenArgs(child_args, nchild_args), generators);
break;
case kRegexpAlternate: // Matches union of sub_[0..nsub-1].
generators[re] = std::make_shared<RegexpAlternateFunction>(CopyChildrenArgs(child_args, nchild_args), generators);
break;
case kRegexpQuest: // Matches sub_[0] zero or one times.
chassert(nchild_args == 1);
generators[re] = std::make_shared<RegexpRepeatFunction>(child_args[0], generators, 0, 1);
break;
case kRegexpStar: // Matches sub_[0] zero or more times.
chassert(nchild_args == 1);
generators[re] = std::make_shared<RegexpRepeatFunction>(child_args[0], generators, 0, ImplicitMax);
break;
case kRegexpPlus: // Matches sub_[0] one or more times.
chassert(nchild_args == 1);
generators[re] = std::make_shared<RegexpRepeatFunction>(child_args[0], generators, 1, ImplicitMax);
break;
case kRegexpCharClass: // Matches character class given by cc_.
chassert(nchild_args == 0);
generators[re] = std::make_shared<RegexpCharClassFunction>(re);
break;
case kRegexpLiteralString: // Matches runes_.
chassert(nchild_args == 0);
generators[re] = std::make_shared<RegexpLiteralStringFunction>(re);
break;
case kRegexpLiteral: // Matches rune_.
chassert(nchild_args == 0);
generators[re] = std::make_shared<RegexpLiteralFunction>(re);
break;
case kRegexpCapture: // Parenthesized (capturing) subexpression.
chassert(nchild_args == 1);
generators[re] = generators.at(child_args[0]);
break;
case kRegexpNoMatch: // Matches no strings.
case kRegexpEmptyMatch: // Matches empty string.
case kRegexpRepeat: // Matches sub_[0] at least min_ times, at most max_ times.
case kRegexpAnyChar: // Matches any character.
case kRegexpAnyByte: // Matches any byte [sic].
case kRegexpBeginLine: // Matches empty string at beginning of line.
case kRegexpEndLine: // Matches empty string at end of line.
case kRegexpWordBoundary: // Matches word boundary "\b".
case kRegexpNoWordBoundary: // Matches not-a-word boundary "\B".
case kRegexpBeginText: // Matches empty string at beginning of text.
case kRegexpEndText: // Matches empty string at end of text.
case kRegexpHaveMatch: // Forces match of entire expression
generators[re] = std::make_shared<ThrowExceptionFunction>(re);
}
return pre_arg;
}
Regexp * root = nullptr;
Generators generators;
};
}
namespace DB
{
void RandomStringGeneratorByRegexp::RegexpPtrDeleter::operator() (re2::Regexp * re) const noexcept
{
re->Decref();
}
RandomStringGeneratorByRegexp::RandomStringGeneratorByRegexp(const String & re_str)
{
re2::RE2::Options options;
options.set_case_sensitive(true);
options.set_encoding(re2::RE2::Options::EncodingLatin1);
auto flags = static_cast<re2::Regexp::ParseFlags>(options.ParseFlags());
re2::RegexpStatus status;
regexp.reset(re2::Regexp::Parse(re_str, flags, &status));
if (!regexp)
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
"Error parsing regexp '{}': {}",
re_str, status.Text());
regexp.reset(regexp->Simplify());
auto walker = re2::RandomStringPrepareWalker();
walker.Walk(regexp.get(), {});
generatorFunc = walker.getGenerator();
{
auto test_check = generate();
auto matched = RE2::FullMatch(test_check, re2::RE2(re_str));
if (!matched)
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
"Generator is unable to produce random string for regexp '{}': {}",
re_str, test_check);
}
}
String RandomStringGeneratorByRegexp::generate() const
{
chassert(generatorFunc);
return generatorFunc();
}
}

View File

@ -0,0 +1,31 @@
#pragma once
#include <base/types.h>
#include <memory>
namespace re2
{
class Regexp;
}
namespace DB
{
class RandomStringGeneratorByRegexp
{
public:
explicit RandomStringGeneratorByRegexp(const String & re_str);
String generate() const;
private:
struct RegexpPtrDeleter
{
void operator()(re2::Regexp * re) const noexcept;
};
using RegexpPtr = std::unique_ptr<re2::Regexp, RegexpPtrDeleter>;
RegexpPtr regexp;
std::function<String()> generatorFunc;
};
}

View File

@ -65,4 +65,5 @@ ObjectStorageKey ObjectStorageKey::createAsAbsolute(String key_)
object_key.is_relative = false;
return object_key;
}
}

View File

@ -0,0 +1,94 @@
#include "ObjectStorageKeyGenerator.h"
#include <Common/getRandomASCIIString.h>
#include <Common/MatchGenerator.h>
#include <fmt/format.h>
class GeneratorWithTemplate : public DB::IObjectStorageKeysGenerator
{
public:
explicit GeneratorWithTemplate(String key_template_)
: key_template(std::move(key_template_))
, re_gen(key_template)
{
}
DB::ObjectStorageKey generate(const String &) const override
{
return DB::ObjectStorageKey::createAsAbsolute(re_gen.generate());
}
private:
String key_template;
DB::RandomStringGeneratorByRegexp re_gen;
};
class GeneratorWithPrefix : public DB::IObjectStorageKeysGenerator
{
public:
explicit GeneratorWithPrefix(String key_prefix_)
: key_prefix(std::move(key_prefix_))
{}
DB::ObjectStorageKey generate(const String &) const override
{
/// Path to store the new S3 object.
/// Total length is 32 a-z characters for enough randomness.
/// First 3 characters are used as a prefix for
/// https://aws.amazon.com/premiumsupport/knowledge-center/s3-object-key-naming-pattern/
constexpr size_t key_name_total_size = 32;
constexpr size_t key_name_prefix_size = 3;
/// Path to store new S3 object.
String key = fmt::format("{}/{}",
DB::getRandomASCIIString(key_name_prefix_size),
DB::getRandomASCIIString(key_name_total_size - key_name_prefix_size));
/// what ever key_prefix value is, consider that key as relative
return DB::ObjectStorageKey::createAsRelative(key_prefix, key);
}
private:
String key_prefix;
};
class GeneratorAsIsWithPrefix : public DB::IObjectStorageKeysGenerator
{
public:
explicit GeneratorAsIsWithPrefix(String key_prefix_)
: key_prefix(std::move(key_prefix_))
{}
DB::ObjectStorageKey generate(const String & path) const override
{
return DB::ObjectStorageKey::createAsRelative(key_prefix, path);
}
private:
String key_prefix;
};
namespace DB
{
ObjectStorageKeysGeneratorPtr createObjectStorageKeysGeneratorAsIsWithPrefix(String key_prefix)
{
return std::make_shared<GeneratorAsIsWithPrefix>(std::move(key_prefix));
}
ObjectStorageKeysGeneratorPtr createObjectStorageKeysGeneratorByPrefix(String key_prefix)
{
return std::make_shared<GeneratorWithPrefix>(std::move(key_prefix));
}
ObjectStorageKeysGeneratorPtr createObjectStorageKeysGeneratorByTemplate(String key_template)
{
return std::make_shared<GeneratorWithTemplate>(std::move(key_template));
}
}

View File

@ -0,0 +1,22 @@
#pragma once
#include "ObjectStorageKey.h"
#include <memory>
namespace DB
{
class IObjectStorageKeysGenerator
{
public:
virtual ObjectStorageKey generate(const String & path) const = 0;
virtual ~IObjectStorageKeysGenerator() = default;
};
using ObjectStorageKeysGeneratorPtr = std::shared_ptr<IObjectStorageKeysGenerator>;
ObjectStorageKeysGeneratorPtr createObjectStorageKeysGeneratorAsIsWithPrefix(String key_prefix);
ObjectStorageKeysGeneratorPtr createObjectStorageKeysGeneratorByPrefix(String key_prefix);
ObjectStorageKeysGeneratorPtr createObjectStorageKeysGeneratorByTemplate(String key_template);
}

View File

@ -391,6 +391,9 @@ The server successfully detected this situation and will download merged part fr
M(DiskS3PutObject, "Number of DiskS3 API PutObject calls.") \
M(DiskS3GetObject, "Number of DiskS3 API GetObject calls.") \
\
M(S3Clients, "Number of created S3 clients.") \
M(TinyS3Clients, "Number of S3 clients copies which reuse an existing auth provider from another client.") \
\
M(EngineFileLikeReadFiles, "Number of files read in table engines working with files (like File/S3/URL/HDFS).") \
\
M(ReadBufferFromS3Microseconds, "Time spent on reading from S3.") \

View File

@ -296,6 +296,9 @@ constexpr std::pair<std::string_view, std::string_view> replacements[]
// Replace parts from @c replacements with shorter aliases
String demangleAndCollapseNames(std::string_view file, const char * const symbol_name)
{
if (!symbol_name)
return "?";
std::string_view file_copy = file;
if (auto trim_pos = file.find_last_of('/'); trim_pos != file.npos)
file_copy.remove_suffix(file.size() - trim_pos);

View File

@ -28,6 +28,7 @@
#cmakedefine01 USE_S2_GEOMETRY
#cmakedefine01 USE_FASTOPS
#cmakedefine01 USE_SQIDS
#cmakedefine01 USE_IDNA
#cmakedefine01 USE_NLP
#cmakedefine01 USE_VECTORSCAN
#cmakedefine01 USE_LIBURING

View File

@ -2,6 +2,8 @@
#include <gtest/gtest.h>
#include <array>
#include <atomic>
#include <exception>
#include <list>
#include <barrier>
#include <chrono>
@ -544,6 +546,99 @@ TEST(AsyncLoader, ScheduleJobWithCanceledDependencies)
}
}
TEST(AsyncLoader, IgnoreDependencyFailure)
{
AsyncLoaderTest t;
std::atomic<bool> success{false};
t.loader.start();
std::string_view error_message = "test job failure";
auto failed_job_func = [&] (AsyncLoader &, const LoadJobPtr &) {
throw Exception(ErrorCodes::ASYNC_LOAD_FAILED, "{}", error_message);
};
auto dependent_job_func = [&] (AsyncLoader &, const LoadJobPtr &) {
success.store(true);
};
auto failed_job = makeLoadJob({}, "failed_job", failed_job_func);
auto dependent_job = makeLoadJob({failed_job},
"dependent_job", ignoreDependencyFailure, dependent_job_func);
auto task = t.schedule({ failed_job, dependent_job });
t.loader.wait();
ASSERT_EQ(failed_job->status(), LoadStatus::FAILED);
ASSERT_EQ(dependent_job->status(), LoadStatus::OK);
ASSERT_EQ(success.load(), true);
}
TEST(AsyncLoader, CustomDependencyFailure)
{
AsyncLoaderTest t(16);
int error_count = 0;
std::atomic<size_t> good_count{0};
std::barrier canceled_sync(4);
t.loader.start();
std::string_view error_message = "test job failure";
auto evil_dep_func = [&] (AsyncLoader &, const LoadJobPtr &) {
throw Exception(ErrorCodes::ASYNC_LOAD_FAILED, "{}", error_message);
};
auto good_dep_func = [&] (AsyncLoader &, const LoadJobPtr &) {
good_count++;
};
auto late_dep_func = [&] (AsyncLoader &, const LoadJobPtr &) {
canceled_sync.arrive_and_wait(); // wait for fail (A) before this job is finished
};
auto collect_job_func = [&] (AsyncLoader &, const LoadJobPtr &) {
FAIL(); // job should be canceled, so we never get here
};
auto dependent_job_func = [&] (AsyncLoader &, const LoadJobPtr &) {
FAIL(); // job should be canceled, so we never get here
};
auto fail_after_two = [&] (const LoadJobPtr & self, const LoadJobPtr &, std::exception_ptr & cancel) {
if (++error_count == 2)
cancel = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED,
"Load job '{}' canceled: too many dependencies have failed",
self->name));
};
auto evil_dep1 = makeLoadJob({}, "evil_dep1", evil_dep_func);
auto evil_dep2 = makeLoadJob({}, "evil_dep2", evil_dep_func);
auto evil_dep3 = makeLoadJob({}, "evil_dep3", evil_dep_func);
auto good_dep1 = makeLoadJob({}, "good_dep1", good_dep_func);
auto good_dep2 = makeLoadJob({}, "good_dep2", good_dep_func);
auto good_dep3 = makeLoadJob({}, "good_dep3", good_dep_func);
auto late_dep1 = makeLoadJob({}, "late_dep1", late_dep_func);
auto late_dep2 = makeLoadJob({}, "late_dep2", late_dep_func);
auto late_dep3 = makeLoadJob({}, "late_dep3", late_dep_func);
auto collect_job = makeLoadJob({
evil_dep1, evil_dep2, evil_dep3,
good_dep1, good_dep2, good_dep3,
late_dep1, late_dep2, late_dep3
}, "collect_job", fail_after_two, collect_job_func);
auto dependent_job1 = makeLoadJob({ collect_job }, "dependent_job1", dependent_job_func);
auto dependent_job2 = makeLoadJob({ collect_job }, "dependent_job2", dependent_job_func);
auto dependent_job3 = makeLoadJob({ collect_job }, "dependent_job3", dependent_job_func);
auto task = t.schedule({ dependent_job1, dependent_job2, dependent_job3 }); // Other jobs should be discovery automatically
t.loader.wait(collect_job, true);
canceled_sync.arrive_and_wait(); // (A)
t.loader.wait();
ASSERT_EQ(late_dep1->status(), LoadStatus::OK);
ASSERT_EQ(late_dep2->status(), LoadStatus::OK);
ASSERT_EQ(late_dep3->status(), LoadStatus::OK);
ASSERT_EQ(collect_job->status(), LoadStatus::CANCELED);
ASSERT_EQ(dependent_job1->status(), LoadStatus::CANCELED);
ASSERT_EQ(dependent_job2->status(), LoadStatus::CANCELED);
ASSERT_EQ(dependent_job3->status(), LoadStatus::CANCELED);
ASSERT_EQ(good_count.load(), 3);
}
TEST(AsyncLoader, TestConcurrency)
{
AsyncLoaderTest t(10);

View File

@ -0,0 +1,101 @@
#include <Common/MatchGenerator.h>
#include <Common/ObjectStorageKeyGenerator.h>
#include <Common/Stopwatch.h>
#include <Common/Exception.h>
#include <gtest/gtest.h>
void routine(String s)
{
std::cerr << "case '"<< s << "'";
auto gen = DB::RandomStringGeneratorByRegexp(s);
[[maybe_unused]] auto res = gen.generate();
std::cerr << " result '"<< res << "'" << std::endl;
}
TEST(GenerateRandomString, Positive)
{
routine(".");
routine("[[:xdigit:]]");
routine("[0-9a-f]");
routine("[a-z]");
routine("prefix-[0-9a-f]-suffix");
routine("prefix-[a-z]-suffix");
routine("[0-9a-f]{3}");
routine("prefix-[0-9a-f]{3}-suffix");
routine("prefix-[a-z]{3}-suffix/[0-9a-f]{20}");
routine("left|right");
routine("[a-z]{0,3}");
routine("just constant string");
routine("[a-z]?");
routine("[a-z]*");
routine("[a-z]+");
routine("[^a-z]");
routine("[[:lower:]]{3}/suffix");
routine("prefix-(A|B|[0-9a-f]){3}");
routine("mergetree/[a-z]{3}/[a-z]{29}");
}
TEST(GenerateRandomString, Negative)
{
EXPECT_THROW(routine("[[:do_not_exists:]]"), DB::Exception);
EXPECT_THROW(routine("[:do_not_exis..."), DB::Exception);
EXPECT_THROW(routine("^abc"), DB::Exception);
}
TEST(GenerateRandomString, DifferentResult)
{
std::cerr << "100 different keys" << std::endl;
auto gen = DB::RandomStringGeneratorByRegexp("prefix-[a-z]{3}-suffix/[0-9a-f]{20}");
std::set<String> deduplicate;
for (int i = 0; i < 100; ++i)
ASSERT_TRUE(deduplicate.insert(gen.generate()).second);
std::cerr << "100 different keys: ok" << std::endl;
}
TEST(GenerateRandomString, FullRange)
{
std::cerr << "all possible letters" << std::endl;
auto gen = DB::RandomStringGeneratorByRegexp("[a-z]");
std::set<String> deduplicate;
int count = 'z' - 'a' + 1;
while (deduplicate.size() < count)
if (deduplicate.insert(gen.generate()).second)
std::cerr << " +1 ";
std::cerr << "all possible letters, ok" << std::endl;
}
UInt64 elapsed(DB::ObjectStorageKeysGeneratorPtr generator)
{
String path = "some_path";
Stopwatch watch;
for (int i = 0; i < 100000; ++i)
{
[[ maybe_unused ]] auto result = generator->generate(path).serialize();
}
return watch.elapsedMicroseconds();
}
TEST(ObjectStorageKey, Performance)
{
auto elapsed_old = elapsed(DB::createObjectStorageKeysGeneratorByPrefix(
"xx-xx-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/mergetree/"));
std::cerr << "old: " << elapsed_old << std::endl;
auto elapsed_new = elapsed(DB::createObjectStorageKeysGeneratorByTemplate(
"xx-xx-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/mergetree/[a-z]{3}/[a-z]{29}"));
std::cerr << "new: " << elapsed_new << std::endl;
if (elapsed_new > elapsed_old)
{
if (elapsed_new > elapsed_old)
std::cerr << "slow ratio: +" << float(elapsed_new) / elapsed_old << std::endl;
else
std::cerr << "fast ratio: " << float(elapsed_old) / elapsed_new << std::endl;
ASSERT_LT(elapsed_new, 1.2 * elapsed_old);
}
}

View File

@ -4,6 +4,7 @@
#include "config.h"
#include <chrono>
#include <mutex>
#include <string>
#include <Coordination/KeeperStateMachine.h>
#include <Coordination/KeeperStateManager.h>
@ -14,6 +15,7 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <boost/algorithm/string.hpp>
#include <libnuraft/callback.hxx>
#include <libnuraft/cluster_config.hxx>
#include <libnuraft/log_val_type.hxx>
#include <libnuraft/msg_type.hxx>
@ -196,13 +198,9 @@ struct KeeperServer::KeeperRaftServer : public nuraft::raft_server
nuraft::raft_server::commit_in_bg();
}
void commitLogs(uint64_t index_to_commit, bool initial_commit_exec)
std::unique_lock<std::recursive_mutex> lockRaft()
{
leader_commit_index_.store(index_to_commit);
quick_commit_index_ = index_to_commit;
lagging_sm_target_index_ = index_to_commit;
commit_in_bg_exec(0, initial_commit_exec);
return std::unique_lock(lock_);
}
using nuraft::raft_server::raft_server;
@ -518,6 +516,7 @@ void KeeperServer::putLocalReadRequest(const KeeperStorage::RequestForSession &
RaftAppendResult KeeperServer::putRequestBatch(const KeeperStorage::RequestsForSessions & requests_for_sessions)
{
std::vector<nuraft::ptr<nuraft::buffer>> entries;
entries.reserve(requests_for_sessions.size());
for (const auto & request_for_session : requests_for_sessions)
entries.push_back(getZooKeeperLogEntry(request_for_session));
@ -630,11 +629,20 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
{
const auto preprocess_logs = [&]
{
auto lock = raft_instance->lockRaft();
if (keeper_context->local_logs_preprocessed)
return;
keeper_context->local_logs_preprocessed = true;
auto log_store = state_manager->load_log_store();
if (last_log_idx_on_disk > 0 && last_log_idx_on_disk > state_machine->last_commit_index())
auto log_entries = log_store->log_entries(state_machine->last_commit_index() + 1, log_store->next_slot());
if (log_entries->empty())
{
auto log_entries = log_store->log_entries(state_machine->last_commit_index() + 1, last_log_idx_on_disk + 1);
LOG_INFO(log, "All local log entries preprocessed");
return;
}
size_t preprocessed = 0;
LOG_INFO(log, "Preprocessing {} log entries", log_entries->size());
@ -651,11 +659,6 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
LOG_TRACE(log, "Preprocessed {}/{} entries", preprocessed, log_entries->size());
}
LOG_INFO(log, "Preprocessing done");
}
else
{
LOG_INFO(log, "All local log entries preprocessed");
}
};
switch (type)
@ -666,43 +669,34 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
/// until we preprocess all stored logs
return nuraft::cb_func::ReturnCode::ReturnNull;
}
case nuraft::cb_func::InitialBatchCommited:
{
preprocess_logs();
break;
}
case nuraft::cb_func::GotAppendEntryReqFromLeader:
{
auto & req = *static_cast<nuraft::req_msg *>(param->ctx);
if (req.get_commit_idx() == 0 || req.log_entries().empty())
break;
auto last_committed_index = state_machine->last_commit_index();
// Actual log number.
auto index_to_commit = std::min({last_log_idx_on_disk, req.get_last_log_idx(), req.get_commit_idx()});
if (index_to_commit > last_committed_index)
{
LOG_TRACE(log, "Trying to commit local log entries, committing upto {}", index_to_commit);
raft_instance->commitLogs(index_to_commit, true);
/// after we manually committed all the local logs we can, we assert that all of the local logs are either
/// committed or preprocessed
if (!keeper_context->local_logs_preprocessed)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Local logs are not preprocessed");
}
else if (last_log_idx_on_disk <= last_committed_index)
{
keeper_context->local_logs_preprocessed = true;
}
else if
(
index_to_commit == 0 ||
(index_to_commit == last_committed_index && last_log_idx_on_disk > index_to_commit) /// we need to rollback all the logs so we preprocess all of them
)
/// maybe we got snapshot installed
if (state_machine->last_commit_index() >= last_log_idx_on_disk)
{
preprocess_logs();
break;
}
auto & req = *static_cast<nuraft::req_msg *>(param->ctx);
if (req.log_entries().empty())
break;
if (req.get_last_log_idx() < last_log_idx_on_disk)
last_log_idx_on_disk = req.get_last_log_idx();
/// we don't want to accept too many new logs before we preprocess all the local logs
/// because the next log index is decreased on each failure we need to also accept requests when it's near last_log_idx_on_disk
/// so the counter is reset on the leader side
else if (raft_instance->get_target_committed_log_idx() >= last_log_idx_on_disk && req.get_last_log_idx() > last_log_idx_on_disk)
return nuraft::cb_func::ReturnNull;
break;
}
case nuraft::cb_func::StateMachineExecution:
{
if (state_machine->last_commit_index() >= last_log_idx_on_disk)
preprocess_logs();
break;
}
default:

View File

@ -70,7 +70,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
{
std::lock_guard client_lock{snapshot_s3_client_mutex};
// if client is not changed (same auth settings, same endpoint) we don't need to update
if (snapshot_s3_client && snapshot_s3_client->client && auth_settings == snapshot_s3_client->auth_settings
if (snapshot_s3_client && snapshot_s3_client->client && !snapshot_s3_client->auth_settings.hasUpdates(auth_settings)
&& snapshot_s3_client->uri.uri == new_uri.uri)
return;
}

View File

@ -13,6 +13,7 @@ private:
static inline const std::unordered_map<LogsLevel, Poco::Message::Priority> LEVELS =
{
{LogsLevel::test, Poco::Message::Priority::PRIO_TEST},
{LogsLevel::trace, Poco::Message::Priority::PRIO_TRACE},
{LogsLevel::debug, Poco::Message::Priority::PRIO_DEBUG},
{LogsLevel::information, Poco::Message::PRIO_INFORMATION},

View File

@ -4,6 +4,8 @@
#include <fmt/core.h>
#include <libnuraft/srv_config.hxx>
#include <optional>
namespace DB
{
// default- and copy-constructible version of nuraft::srv_config

View File

@ -113,6 +113,8 @@ namespace DB
M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \
M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
/// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp
DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS)
struct ServerSettings : public BaseSettings<ServerSettingsTraits>

View File

@ -204,6 +204,8 @@ class IColumn;
M(Bool, input_format_parallel_parsing, true, "Enable parallel parsing for some data formats.", 0) \
M(UInt64, min_chunk_bytes_for_parallel_parsing, (10 * 1024 * 1024), "The minimum chunk size in bytes, which each thread will parse in parallel.", 0) \
M(Bool, output_format_parallel_formatting, true, "Enable parallel formatting for some data formats.", 0) \
M(UInt64, output_format_compression_level, 3, "Default compression level if query output is compressed. The setting is applied when `SELECT` query has `INTO OUTFILE` or when inserting to table function `file`, `url`, `hdfs`, `s3`, and `azureBlobStorage`.", 0) \
M(UInt64, output_format_compression_zstd_window_log, 0, "Can be used when the output compression method is `zstd`. If greater than `0`, this setting explicitly sets compression window size (power of `2`) and enables a long-range mode for zstd compression.", 0) \
\
M(UInt64, merge_tree_min_rows_for_concurrent_read, (20 * 8192), "If at least as many lines are read from one file, the reading can be parallelized.", 0) \
M(UInt64, merge_tree_min_bytes_for_concurrent_read, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized.", 0) \

View File

@ -227,11 +227,17 @@ LoadTaskPtr DatabaseOrdinary::startupDatabaseAsync(
LoadJobSet startup_after,
LoadingStrictnessLevel /*mode*/)
{
// NOTE: this task is empty, but it is required for correct dependency handling (startup should be done after tables loading)
auto job = makeLoadJob(
std::move(startup_after),
TablesLoaderBackgroundStartupPoolId,
fmt::format("startup Ordinary database {}", getDatabaseName()));
fmt::format("startup Ordinary database {}", getDatabaseName()),
ignoreDependencyFailure,
[] (AsyncLoader &, const LoadJobPtr &)
{
// NOTE: this job is no-op, but it is required for correct dependency handling
// 1) startup should be done after tables loading
// 2) load or startup errors for tables should not lead to not starting up the whole database
});
return startup_database_task = makeLoadTask(async_loader, {job});
}

View File

@ -258,12 +258,6 @@ String DiskObjectStorage::getUniqueId(const String & path) const
bool DiskObjectStorage::checkUniqueId(const String & id) const
{
if (!id.starts_with(object_key_prefix))
{
LOG_DEBUG(log, "Blob with id {} doesn't start with blob storage prefix {}, Stack {}", id, object_key_prefix, StackTrace().toString());
return false;
}
auto object = StoredObject(id);
return object_storage->exists(object);
}

View File

@ -20,6 +20,7 @@ namespace ErrorCodes
void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf)
{
readIntText(version, buf);
assertChar('\n', buf);
if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_FULL_OBJECT_KEY)
throw Exception(
@ -27,8 +28,6 @@ void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf)
"Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}",
metadata_file_path, toString(version), toString(VERSION_FULL_OBJECT_KEY));
assertChar('\n', buf);
UInt32 keys_count;
readIntText(keys_count, buf);
assertChar('\t', buf);
@ -122,6 +121,7 @@ void DiskObjectStorageMetadata::serialize(WriteBuffer & buf, bool sync) const
chassert(write_version >= VERSION_ABSOLUTE_PATHS && write_version <= VERSION_FULL_OBJECT_KEY);
writeIntText(write_version, buf);
writeChar('\n', buf);
writeIntText(keys_with_meta.size(), buf);

View File

@ -19,7 +19,6 @@
#include <Disks/ObjectStorages/S3/diskSettings.h>
#include <Common/getRandomASCIIString.h>
#include <Common/ProfileEvents.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/logger_useful.h>
@ -556,27 +555,12 @@ std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
return std::make_unique<S3ObjectStorage>(
std::move(new_client), std::move(new_s3_settings),
version_id, s3_capabilities, new_namespace,
endpoint, object_key_prefix, disk_name);
endpoint, key_generator, disk_name);
}
ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string &) const
ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string & path) const
{
/// Path to store the new S3 object.
/// Total length is 32 a-z characters for enough randomness.
/// First 3 characters are used as a prefix for
/// https://aws.amazon.com/premiumsupport/knowledge-center/s3-object-key-naming-pattern/
constexpr size_t key_name_total_size = 32;
constexpr size_t key_name_prefix_size = 3;
/// Path to store new S3 object.
String key = fmt::format("{}/{}",
getRandomASCIIString(key_name_prefix_size),
getRandomASCIIString(key_name_total_size - key_name_prefix_size));
/// what ever key_prefix value is, consider that key as relative
return ObjectStorageKey::createAsRelative(object_key_prefix, key);
return key_generator->generate(path);
}

View File

@ -9,6 +9,7 @@
#include <memory>
#include <Storages/StorageS3Settings.h>
#include <Common/MultiVersion.h>
#include <Common/ObjectStorageKeyGenerator.h>
namespace DB
@ -39,7 +40,6 @@ struct S3ObjectStorageSettings
bool read_only;
};
class S3ObjectStorage : public IObjectStorage
{
private:
@ -53,10 +53,10 @@ private:
const S3Capabilities & s3_capabilities_,
String bucket_,
String connection_string,
String object_key_prefix_,
ObjectStorageKeysGeneratorPtr key_generator_,
const String & disk_name_)
: bucket(std::move(bucket_))
, object_key_prefix(std::move(object_key_prefix_))
, key_generator(std::move(key_generator_))
, disk_name(disk_name_)
, client(std::move(client_))
, s3_settings(std::move(s3_settings_))
@ -179,7 +179,7 @@ private:
private:
std::string bucket;
String object_key_prefix;
ObjectStorageKeysGeneratorPtr key_generator;
std::string disk_name;
MultiVersion<S3::Client> client;
@ -199,11 +199,6 @@ private:
class S3PlainObjectStorage : public S3ObjectStorage
{
public:
ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override
{
return ObjectStorageKey::createAsRelative(object_key_prefix, path);
}
std::string getName() const override { return "S3PlainObjectStorage"; }
template <class ...Args>

View File

@ -91,6 +91,60 @@ private:
}
};
std::pair<String, ObjectStorageKeysGeneratorPtr> getPrefixAndKeyGenerator(
String type, const S3::URI & uri, const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
{
if (type == "s3_plain")
return {uri.key, createObjectStorageKeysGeneratorAsIsWithPrefix(uri.key)};
chassert(type == "s3");
bool storage_metadata_write_full_object_key = DiskObjectStorageMetadata::getWriteFullObjectKeySetting();
bool send_metadata = config.getBool(config_prefix + ".send_metadata", false);
if (send_metadata && storage_metadata_write_full_object_key)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Wrong configuration in {}. "
"s3 does not supports feature 'send_metadata' with feature 'storage_metadata_write_full_object_key'.",
config_prefix);
String object_key_compatibility_prefix = config.getString(config_prefix + ".key_compatibility_prefix", String());
String object_key_template = config.getString(config_prefix + ".key_template", String());
if (object_key_template.empty())
{
if (!object_key_compatibility_prefix.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Wrong configuration in {}. "
"Setting 'key_compatibility_prefix' can be defined only with setting 'key_template'.",
config_prefix);
return {uri.key, createObjectStorageKeysGeneratorByPrefix(uri.key)};
}
if (send_metadata)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Wrong configuration in {}. "
"s3 does not supports send_metadata with setting 'key_template'.",
config_prefix);
if (!storage_metadata_write_full_object_key)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Wrong configuration in {}. "
"Feature 'storage_metadata_write_full_object_key' has to be enabled in order to use setting 'key_template'.",
config_prefix);
if (!uri.key.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Wrong configuration in {}. "
"URI.key is forbidden with settings 'key_template', use setting 'key_compatibility_prefix' instead'. "
"URI.key: '{}', bucket: '{}'. ",
config_prefix,
uri.key, uri.bucket);
return {object_key_compatibility_prefix, createObjectStorageKeysGeneratorByTemplate(object_key_template)};
}
}
void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
@ -104,7 +158,8 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
{
String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
S3::URI uri(endpoint);
if (!uri.key.ends_with('/'))
// an empty key remains empty
if (!uri.key.empty() && !uri.key.ends_with('/'))
uri.key.push_back('/');
S3Capabilities s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
@ -113,6 +168,8 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
String type = config.getString(config_prefix + ".type");
chassert(type == "s3" || type == "s3_plain");
auto [object_key_compatibility_prefix, object_key_generator] = getPrefixAndKeyGenerator(type, uri, config, config_prefix);
MetadataStoragePtr metadata_storage;
auto settings = getSettings(config, config_prefix, context);
auto client = getClient(config, config_prefix, context, *settings);
@ -128,20 +185,18 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "s3_plain does not supports send_metadata");
s3_storage = std::make_shared<S3PlainObjectStorage>(
std::move(client), std::move(settings),
uri.version_id, s3_capabilities,
uri.bucket, uri.endpoint, uri.key, name);
metadata_storage = std::make_shared<MetadataStorageFromPlainObjectStorage>(s3_storage, uri.key);
std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint, object_key_generator, name);
metadata_storage = std::make_shared<MetadataStorageFromPlainObjectStorage>(s3_storage, object_key_compatibility_prefix);
}
else
{
s3_storage = std::make_shared<S3ObjectStorage>(
std::move(client), std::move(settings),
uri.version_id, s3_capabilities,
uri.bucket, uri.endpoint, uri.key, name);
std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint, object_key_generator, name);
auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context);
metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, uri.key);
metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, object_key_compatibility_prefix);
}
/// NOTE: should we still perform this check for clickhouse-disks?
@ -164,7 +219,7 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
DiskObjectStoragePtr s3disk = std::make_shared<DiskObjectStorage>(
name,
uri.key,
uri.key, /// might be empty
type == "s3" ? "DiskS3" : "DiskS3Plain",
std::move(metadata_storage),
std::move(s3_storage),

View File

@ -83,6 +83,10 @@ if (TARGET ch_contrib::sqids)
list (APPEND PRIVATE_LIBS ch_contrib::sqids)
endif()
if (TARGET ch_contrib::idna)
list (APPEND PRIVATE_LIBS ch_contrib::idna)
endif()
if (TARGET ch_contrib::h3)
list (APPEND PRIVATE_LIBS ch_contrib::h3)
endif()

202
src/Functions/idna.cpp Normal file
View File

@ -0,0 +1,202 @@
#include "config.h"
#if USE_IDNA
#include <Columns/ColumnString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wnewline-eof"
#endif
# include <ada/idna/to_ascii.h>
# include <ada/idna/to_unicode.h>
# include <ada/idna/unicode_transcoding.h>
#ifdef __clang__
# pragma clang diagnostic pop
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int NOT_IMPLEMENTED;
}
/// Implementation of
/// - idnaEncode(), tryIdnaEncode() and idnaDecode(), see https://en.wikipedia.org/wiki/Internationalized_domain_name#ToASCII_and_ToUnicode
/// and [3] https://www.unicode.org/reports/tr46/#ToUnicode
enum class ErrorHandling
{
Throw, /// Throw exception
Empty /// Return empty string
};
/// Translates a UTF-8 string (typically an Internationalized Domain Name for Applications, IDNA) to an ASCII-encoded equivalent. The
/// encoding is performed per domain component and based on Punycode with ASCII Compatible Encoding (ACE) prefix "xn--".
/// Example: "straße.münchen.de" --> "xn--strae-oqa.xn--mnchen-3ya.de"
/// Note: doesn't do percent decoding. Doesn't trim tabs, spaces or control characters. Expects non-empty inputs.
template <ErrorHandling error_handling>
struct IdnaEncode
{
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
const size_t rows = offsets.size();
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
res_offsets.reserve(rows);
size_t prev_offset = 0;
std::string ascii;
for (size_t row = 0; row < rows; ++row)
{
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
const size_t value_length = offsets[row] - prev_offset - 1;
std::string_view value_view(value, value_length);
if (!value_view.empty()) /// to_ascii() expects non-empty input
{
ascii = ada::idna::to_ascii(value_view);
const bool ok = !ascii.empty();
if (!ok)
{
if constexpr (error_handling == ErrorHandling::Throw)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' cannot be converted to ASCII", value_view);
}
else
{
static_assert(error_handling == ErrorHandling::Empty);
ascii.clear();
}
}
}
res_data.insert(ascii.c_str(), ascii.c_str() + ascii.size() + 1);
res_offsets.push_back(res_data.size());
prev_offset = offsets[row];
ascii.clear();
}
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
}
};
/// Translates an ASII-encoded IDNA string back to its UTF-8 representation.
struct IdnaDecode
{
/// As per the specification, invalid inputs are returned as is, i.e. there is no special error handling.
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
const size_t rows = offsets.size();
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
res_offsets.reserve(rows);
size_t prev_offset = 0;
std::string unicode;
for (size_t row = 0; row < rows; ++row)
{
const char * ascii = reinterpret_cast<const char *>(&data[prev_offset]);
const size_t ascii_length = offsets[row] - prev_offset - 1;
std::string_view ascii_view(ascii, ascii_length);
unicode = ada::idna::to_unicode(ascii_view);
res_data.insert(unicode.c_str(), unicode.c_str() + unicode.size() + 1);
res_offsets.push_back(res_data.size());
prev_offset = offsets[row];
unicode.clear();
}
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
}
};
struct NameIdnaEncode { static constexpr auto name = "idnaEncode"; };
struct NameTryIdnaEncode { static constexpr auto name = "tryIdnaEncode"; };
struct NameIdnaDecode { static constexpr auto name = "idnaDecode"; };
using FunctionIdnaEncode = FunctionStringToString<IdnaEncode<ErrorHandling::Throw>, NameIdnaEncode>;
using FunctionTryIdnaEncode = FunctionStringToString<IdnaEncode<ErrorHandling::Empty>, NameTryIdnaEncode>;
using FunctionIdnaDecode = FunctionStringToString<IdnaDecode, NameIdnaDecode>;
REGISTER_FUNCTION(Idna)
{
factory.registerFunction<FunctionIdnaEncode>(FunctionDocumentation{
.description=R"(
Computes an ASCII representation of an Internationalized Domain Name. Throws an exception in case of error.)",
.syntax="idnaEncode(str)",
.arguments={{"str", "Input string"}},
.returned_value="An ASCII-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
.examples={
{"simple",
"SELECT idnaEncode('straße.münchen.de') AS ascii;",
R"(
ascii
xn--strae-oqa.xn--mnchen-3ya.de
)"
}}
});
factory.registerFunction<FunctionTryIdnaEncode>(FunctionDocumentation{
.description=R"(
Computes a ASCII representation of an Internationalized Domain Name. Returns an empty string in case of error)",
.syntax="punycodeEncode(str)",
.arguments={{"str", "Input string"}},
.returned_value="An ASCII-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
.examples={
{"simple",
"SELECT idnaEncodeOrNull('München') AS ascii;",
R"(
ascii
xn--strae-oqa.xn--mnchen-3ya.de
)"
}}
});
factory.registerFunction<FunctionIdnaDecode>(FunctionDocumentation{
.description=R"(
Computes the Unicode representation of ASCII-encoded Internationalized Domain Name.)",
.syntax="idnaDecode(str)",
.arguments={{"str", "Input string"}},
.returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
.examples={
{"simple",
"SELECT idnaDecode('xn--strae-oqa.xn--mnchen-3ya.de') AS unicode;",
R"(
unicode
straße.münchen.de
)"
}}
});
}
}
#endif

View File

@ -3,6 +3,7 @@
#include <Functions/keyvaluepair/impl/Configuration.h>
#include <base/find_symbols.h>
#include <iterator>
#include <vector>
namespace DB

View File

@ -434,7 +434,7 @@ public:
};
FunctionArgumentDescriptors optional_args{
{optional_argument_names[0], &isNumber<IDataType>, isColumnConst, "const Number"},
{optional_argument_names[0], &isNumber<IDataType>, nullptr, "const Number"},
{optional_argument_names[1], &isNumber<IDataType>, isColumnConst, "const Number"},
{optional_argument_names[2], &isString<IDataType>, isColumnConst, "const String"}
};

206
src/Functions/punycode.cpp Normal file
View File

@ -0,0 +1,206 @@
#include "config.h"
#if USE_IDNA
#include <Columns/ColumnString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wnewline-eof"
#endif
# include <ada/idna/punycode.h>
# include <ada/idna/unicode_transcoding.h>
#ifdef __clang__
# pragma clang diagnostic pop
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int NOT_IMPLEMENTED;
}
/// Implementation of
/// - punycodeEncode(), punycodeDecode() and tryPunycodeDecode(), see https://en.wikipedia.org/wiki/Punycode
enum class ErrorHandling
{
Throw, /// Throw exception
Empty /// Return empty string
};
struct PunycodeEncode
{
/// Encoding-as-punycode can only fail if the input isn't valid UTF8. In that case, return undefined output, i.e. garbage-in, garbage-out.
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
const size_t rows = offsets.size();
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
res_offsets.reserve(rows);
size_t prev_offset = 0;
std::u32string value_utf32;
std::string value_puny;
for (size_t row = 0; row < rows; ++row)
{
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
const size_t value_length = offsets[row] - prev_offset - 1;
const size_t value_utf32_length = ada::idna::utf32_length_from_utf8(value, value_length);
value_utf32.resize(value_utf32_length);
const size_t codepoints = ada::idna::utf8_to_utf32(value, value_length, value_utf32.data());
if (codepoints == 0)
value_utf32.clear(); /// input was empty or no valid UTF-8
const bool ok = ada::idna::utf32_to_punycode(value_utf32, value_puny);
if (!ok)
value_puny.clear();
res_data.insert(value_puny.c_str(), value_puny.c_str() + value_puny.size() + 1);
res_offsets.push_back(res_data.size());
prev_offset = offsets[row];
value_utf32.clear();
value_puny.clear(); /// utf32_to_punycode() appends to its output string
}
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
}
};
template <ErrorHandling error_handling>
struct PunycodeDecode
{
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
const size_t rows = offsets.size();
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
res_offsets.reserve(rows);
size_t prev_offset = 0;
std::u32string value_utf32;
std::string value_utf8;
for (size_t row = 0; row < rows; ++row)
{
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
const size_t value_length = offsets[row] - prev_offset - 1;
const std::string_view value_punycode(value, value_length);
const bool ok = ada::idna::punycode_to_utf32(value_punycode, value_utf32);
if (!ok)
{
if constexpr (error_handling == ErrorHandling::Throw)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' is not a valid Punycode-encoded string", value_punycode);
}
else
{
static_assert(error_handling == ErrorHandling::Empty);
value_utf32.clear();
}
}
const size_t utf8_length = ada::idna::utf8_length_from_utf32(value_utf32.data(), value_utf32.size());
value_utf8.resize(utf8_length);
ada::idna::utf32_to_utf8(value_utf32.data(), value_utf32.size(), value_utf8.data());
res_data.insert(value_utf8.c_str(), value_utf8.c_str() + value_utf8.size() + 1);
res_offsets.push_back(res_data.size());
prev_offset = offsets[row];
value_utf32.clear(); /// punycode_to_utf32() appends to its output string
value_utf8.clear();
}
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
}
};
struct NamePunycodeEncode { static constexpr auto name = "punycodeEncode"; };
struct NamePunycodeDecode { static constexpr auto name = "punycodeDecode"; };
struct NameTryPunycodeDecode { static constexpr auto name = "tryPunycodeDecode"; };
using FunctionPunycodeEncode = FunctionStringToString<PunycodeEncode, NamePunycodeEncode>;
using FunctionPunycodeDecode = FunctionStringToString<PunycodeDecode<ErrorHandling::Throw>, NamePunycodeDecode>;
using FunctionTryPunycodeDecode = FunctionStringToString<PunycodeDecode<ErrorHandling::Empty>, NameTryPunycodeDecode>;
REGISTER_FUNCTION(Punycode)
{
factory.registerFunction<FunctionPunycodeEncode>(FunctionDocumentation{
.description=R"(
Computes a Punycode representation of a string.)",
.syntax="punycodeEncode(str)",
.arguments={{"str", "Input string"}},
.returned_value="The punycode representation [String](/docs/en/sql-reference/data-types/string.md).",
.examples={
{"simple",
"SELECT punycodeEncode('München') AS puny;",
R"(
puny
Mnchen-3ya
)"
}}
});
factory.registerFunction<FunctionPunycodeDecode>(FunctionDocumentation{
.description=R"(
Computes a Punycode representation of a string. Throws an exception if the input is not valid Punycode.)",
.syntax="punycodeDecode(str)",
.arguments={{"str", "A Punycode-encoded string"}},
.returned_value="The plaintext representation [String](/docs/en/sql-reference/data-types/string.md).",
.examples={
{"simple",
"SELECT punycodeDecode('Mnchen-3ya') AS plain;",
R"(
plain
München
)"
}}
});
factory.registerFunction<FunctionTryPunycodeDecode>(FunctionDocumentation{
.description=R"(
Computes a Punycode representation of a string. Returns an empty string if the input is not valid Punycode.)",
.syntax="punycodeDecode(str)",
.arguments={{"str", "A Punycode-encoded string"}},
.returned_value="The plaintext representation [String](/docs/en/sql-reference/data-types/string.md).",
.examples={
{"simple",
"SELECT tryPunycodeDecode('Mnchen-3ya') AS plain;",
R"(
plain
München
)"
}}
});
}
}
#endif

View File

@ -0,0 +1,238 @@
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wold-style-cast"
#pragma clang diagnostic ignored "-Wshadow"
#pragma clang diagnostic ignored "-Wimplicit-float-conversion"
#endif
#include <Functions/stl.hpp>
#ifdef __clang__
#pragma clang diagnostic pop
#endif
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
}
// Decompose time series data based on STL(Seasonal-Trend Decomposition Procedure Based on Loess)
class FunctionSeriesDecomposeSTL : public IFunction
{
public:
static constexpr auto name = "seriesDecomposeSTL";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSeriesDecomposeSTL>(); }
std::string getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors args{
{"time_series", &isArray<IDataType>, nullptr, "Array"},
{"period", &isNativeUInt<IDataType>, nullptr, "Unsigned Integer"},
};
validateFunctionArgumentTypes(*this, arguments, args);
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat32>()));
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
{
ColumnPtr array_ptr = arguments[0].column;
const ColumnArray * array = checkAndGetColumn<ColumnArray>(array_ptr.get());
if (!array)
{
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(arguments[0].column.get());
if (!const_array)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
arguments[0].column->getName(), getName());
array_ptr = const_array->convertToFullColumn();
array = assert_cast<const ColumnArray *>(array_ptr.get());
}
const IColumn & src_data = array->getData();
const ColumnArray::Offsets & src_offsets = array->getOffsets();
auto res = ColumnFloat32::create();
auto & res_data = res->getData();
ColumnArray::ColumnOffsets::MutablePtr res_col_offsets = ColumnArray::ColumnOffsets::create();
auto & res_col_offsets_data = res_col_offsets->getData();
auto root_offsets = ColumnArray::ColumnOffsets::create();
auto & root_offsets_data = root_offsets->getData();
ColumnArray::Offset prev_src_offset = 0;
for (size_t i = 0; i < src_offsets.size(); ++i)
{
UInt64 period;
auto period_ptr = arguments[1].column->convertToFullColumnIfConst();
if (checkAndGetColumn<ColumnUInt8>(period_ptr.get())
|| checkAndGetColumn<ColumnUInt16>(period_ptr.get())
|| checkAndGetColumn<ColumnUInt32>(period_ptr.get())
|| checkAndGetColumn<ColumnUInt64>(period_ptr.get()))
period = period_ptr->getUInt(i);
else
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {} of second argument of function {}",
arguments[1].column->getName(),
getName());
std::vector<Float32> seasonal;
std::vector<Float32> trend;
std::vector<Float32> residue;
ColumnArray::Offset curr_offset = src_offsets[i];
if (executeNumber<UInt8>(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue)
|| executeNumber<UInt16>(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue)
|| executeNumber<UInt32>(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue)
|| executeNumber<UInt64>(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue)
|| executeNumber<Int8>(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue)
|| executeNumber<Int16>(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue)
|| executeNumber<Int32>(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue)
|| executeNumber<Int64>(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue)
|| executeNumber<Float32>(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue)
|| executeNumber<Float64>(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue))
{
res_data.insert(seasonal.begin(), seasonal.end());
res_col_offsets_data.push_back(res_data.size());
res_data.insert(trend.begin(), trend.end());
res_col_offsets_data.push_back(res_data.size());
res_data.insert(residue.begin(), residue.end());
res_col_offsets_data.push_back(res_data.size());
root_offsets_data.push_back(res_col_offsets->size());
prev_src_offset = curr_offset;
}
else
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {} of first argument of function {}",
arguments[0].column->getName(),
getName());
}
ColumnArray::MutablePtr nested_array_col = ColumnArray::create(std::move(res), std::move(res_col_offsets));
return ColumnArray::create(std::move(nested_array_col), std::move(root_offsets));
}
template <typename T>
bool executeNumber(
const IColumn & src_data,
UInt64 period,
ColumnArray::Offset start,
ColumnArray::Offset end,
std::vector<Float32> & seasonal,
std::vector<Float32> & trend,
std::vector<Float32> & residue) const
{
const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data);
if (!src_data_concrete)
return false;
const PaddedPODArray<T> & src_vec = src_data_concrete->getData();
chassert(start <= end);
size_t len = end - start;
if (len < 4)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "At least four data points are needed for function {}", getName());
if (period > (len / 2))
throw Exception(
ErrorCodes::BAD_ARGUMENTS, "The series should have data of at least two period lengths for function {}", getName());
std::vector<float> src(src_vec.begin() + start, src_vec.begin() + end);
auto res = stl::params().fit(src, period);
if (res.seasonal.empty())
return false;
seasonal = std::move(res.seasonal);
trend = std::move(res.trend);
residue = std::move(res.remainder);
return true;
}
};
REGISTER_FUNCTION(seriesDecomposeSTL)
{
factory.registerFunction<FunctionSeriesDecomposeSTL>(FunctionDocumentation{
.description = R"(
Decomposes a time series using STL [(Seasonal-Trend Decomposition Procedure Based on Loess)](https://www.wessa.net/download/stl.pdf) into a season, a trend and a residual component.
**Syntax**
``` sql
seriesDecomposeSTL(series, period);
```
**Arguments**
- `series` - An array of numeric values
- `period` - A positive number
The number of data points in `series` should be at least twice the value of `period`.
**Returned value**
- An array of three arrays where the first array include seasonal components, the second array - trend, and the third array - residue component.
Type: [Array](../../sql-reference/data-types/array.md).
**Examples**
Query:
``` sql
SELECT seriesDecomposeSTL([10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34], 3) AS print_0;
```
Result:
``` text
print_0
[[
-13.529999, -3.1799996, 16.71, -13.53, -3.1799996, 16.71, -13.53, -3.1799996,
16.71, -13.530001, -3.18, 16.710001, -13.530001, -3.1800003, 16.710001, -13.530001,
-3.1800003, 16.710001, -13.530001, -3.1799994, 16.71, -13.529999, -3.1799994, 16.709997
],
[
23.63, 23.63, 23.630003, 23.630001, 23.630001, 23.630001, 23.630001, 23.630001,
23.630001, 23.630001, 23.630001, 23.63, 23.630001, 23.630001, 23.63, 23.630001,
23.630001, 23.63, 23.630001, 23.630001, 23.630001, 23.630001, 23.630001, 23.630003
],
[
0, 0.0000019073486, -0.0000019073486, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.0000019073486, 0,
0
]]
```)",
.categories{"Time series analysis"}});
}
}

513
src/Functions/stl.hpp Normal file
View File

@ -0,0 +1,513 @@
// Dump of https://github.com/ankane/stl-cpp/blob/3b1b3a3e9335cda26c8b0797d8b8d24ac8e350ad/include/stl.hpp.
// Added to ClickHouse source code and not referenced as a submodule because its easier maintain and modify/customize.
/*!
* STL C++ v0.1.3
* https://github.com/ankane/stl-cpp
* Unlicense OR MIT License
*
* Ported from https://www.netlib.org/a/stl
*
* Cleveland, R. B., Cleveland, W. S., McRae, J. E., & Terpenning, I. (1990).
* STL: A Seasonal-Trend Decomposition Procedure Based on Loess.
* Journal of Official Statistics, 6(1), 3-33.
*/
#pragma once
#include <algorithm>
#include <cmath>
#include <numeric>
#include <optional>
#include <stdexcept>
#include <vector>
namespace stl {
bool est(const float* y, size_t n, size_t len, int ideg, float xs, float* ys, size_t nleft, size_t nright, float* w, bool userw, const float* rw) {
auto range = ((float) n) - 1.0;
auto h = std::max(xs - ((float) nleft), ((float) nright) - xs);
if (len > n) {
h += (float) ((len - n) / 2);
}
auto h9 = 0.999 * h;
auto h1 = 0.001 * h;
// compute weights
auto a = 0.0;
for (auto j = nleft; j <= nright; j++) {
w[j - 1] = 0.0;
auto r = fabs(((float) j) - xs);
if (r <= h9) {
if (r <= h1) {
w[j - 1] = 1.0;
} else {
w[j - 1] = pow(1.0 - pow(r / h, 3), 3);
}
if (userw) {
w[j - 1] *= rw[j - 1];
}
a += w[j - 1];
}
}
if (a <= 0.0) {
return false;
} else { // weighted least squares
for (auto j = nleft; j <= nright; j++) { // make sum of w(j) == 1
w[j - 1] /= a;
}
if (h > 0.0 && ideg > 0) { // use linear fit
auto a = 0.0;
for (auto j = nleft; j <= nright; j++) { // weighted center of x values
a += w[j - 1] * ((float) j);
}
auto b = xs - a;
auto c = 0.0;
for (auto j = nleft; j <= nright; j++) {
c += w[j - 1] * pow(((float) j) - a, 2);
}
if (sqrt(c) > 0.001 * range) {
b /= c;
// points are spread out enough to compute slope
for (auto j = nleft; j <= nright; j++) {
w[j - 1] *= b * (((float) j) - a) + 1.0;
}
}
}
*ys = 0.0;
for (auto j = nleft; j <= nright; j++) {
*ys += w[j - 1] * y[j - 1];
}
return true;
}
}
void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool userw, const float* rw, float* ys, float* res) {
if (n < 2) {
ys[0] = y[0];
return;
}
size_t nleft = 0;
size_t nright = 0;
auto newnj = std::min(njump, n - 1);
if (len >= n) {
nleft = 1;
nright = n;
for (size_t i = 1; i <= n; i += newnj) {
auto ok = est(y, n, len, ideg, (float) i, &ys[i - 1], nleft, nright, res, userw, rw);
if (!ok) {
ys[i - 1] = y[i - 1];
}
}
} else if (newnj == 1) { // newnj equal to one, len less than n
auto nsh = (len + 1) / 2;
nleft = 1;
nright = len;
for (size_t i = 1; i <= n; i++) { // fitted value at i
if (i > nsh && nright != n) {
nleft += 1;
nright += 1;
}
auto ok = est(y, n, len, ideg, (float) i, &ys[i - 1], nleft, nright, res, userw, rw);
if (!ok) {
ys[i - 1] = y[i - 1];
}
}
} else { // newnj greater than one, len less than n
auto nsh = (len + 1) / 2;
for (size_t i = 1; i <= n; i += newnj) { // fitted value at i
if (i < nsh) {
nleft = 1;
nright = len;
} else if (i >= n - nsh + 1) {
nleft = n - len + 1;
nright = n;
} else {
nleft = i - nsh + 1;
nright = len + i - nsh;
}
auto ok = est(y, n, len, ideg, (float) i, &ys[i - 1], nleft, nright, res, userw, rw);
if (!ok) {
ys[i - 1] = y[i - 1];
}
}
}
if (newnj != 1) {
for (size_t i = 1; i <= n - newnj; i += newnj) {
auto delta = (ys[i + newnj - 1] - ys[i - 1]) / ((float) newnj);
for (auto j = i + 1; j <= i + newnj - 1; j++) {
ys[j - 1] = ys[i - 1] + delta * ((float) (j - i));
}
}
auto k = ((n - 1) / newnj) * newnj + 1;
if (k != n) {
auto ok = est(y, n, len, ideg, (float) n, &ys[n - 1], nleft, nright, res, userw, rw);
if (!ok) {
ys[n - 1] = y[n - 1];
}
if (k != n - 1) {
auto delta = (ys[n - 1] - ys[k - 1]) / ((float) (n - k));
for (auto j = k + 1; j <= n - 1; j++) {
ys[j - 1] = ys[k - 1] + delta * ((float) (j - k));
}
}
}
}
}
void ma(const float* x, size_t n, size_t len, float* ave) {
auto newn = n - len + 1;
auto flen = (float) len;
auto v = 0.0;
// get the first average
for (size_t i = 0; i < len; i++) {
v += x[i];
}
ave[0] = v / flen;
if (newn > 1) {
auto k = len;
auto m = 0;
for (size_t j = 1; j < newn; j++) {
// window down the array
v = v - x[m] + x[k];
ave[j] = v / flen;
k += 1;
m += 1;
}
}
}
void fts(const float* x, size_t n, size_t np, float* trend, float* work) {
ma(x, n, np, trend);
ma(trend, n - np + 1, np, work);
ma(work, n - 2 * np + 2, 3, trend);
}
void rwts(const float* y, size_t n, const float* fit, float* rw) {
for (size_t i = 0; i < n; i++) {
rw[i] = fabs(y[i] - fit[i]);
}
auto mid1 = (n - 1) / 2;
auto mid2 = n / 2;
// sort
std::sort(rw, rw + n);
auto cmad = 3.0 * (rw[mid1] + rw[mid2]); // 6 * median abs resid
auto c9 = 0.999 * cmad;
auto c1 = 0.001 * cmad;
for (size_t i = 0; i < n; i++) {
auto r = fabs(y[i] - fit[i]);
if (r <= c1) {
rw[i] = 1.0;
} else if (r <= c9) {
rw[i] = pow(1.0 - pow(r / cmad, 2), 2);
} else {
rw[i] = 0.0;
}
}
}
void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump, bool userw, float* rw, float* season, float* work1, float* work2, float* work3, float* work4) {
for (size_t j = 1; j <= np; j++) {
size_t k = (n - j) / np + 1;
for (size_t i = 1; i <= k; i++) {
work1[i - 1] = y[(i - 1) * np + j - 1];
}
if (userw) {
for (size_t i = 1; i <= k; i++) {
work3[i - 1] = rw[(i - 1) * np + j - 1];
}
}
ess(work1, k, ns, isdeg, nsjump, userw, work3, work2 + 1, work4);
auto xs = 0.0;
auto nright = std::min(ns, k);
auto ok = est(work1, k, ns, isdeg, xs, &work2[0], 1, nright, work4, userw, work3);
if (!ok) {
work2[0] = work2[1];
}
xs = k + 1;
size_t nleft = std::max(1, (int) k - (int) ns + 1);
ok = est(work1, k, ns, isdeg, xs, &work2[k + 1], nleft, k, work4, userw, work3);
if (!ok) {
work2[k + 1] = work2[k];
}
for (size_t m = 1; m <= k + 2; m++) {
season[(m - 1) * np + j - 1] = work2[m - 1];
}
}
}
void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, bool userw, float* rw, float* season, float* trend, float* work1, float* work2, float* work3, float* work4, float* work5) {
for (size_t j = 0; j < ni; j++) {
for (size_t i = 0; i < n; i++) {
work1[i] = y[i] - trend[i];
}
ss(work1, n, np, ns, isdeg, nsjump, userw, rw, work2, work3, work4, work5, season);
fts(work2, n + 2 * np, np, work3, work1);
ess(work3, n, nl, ildeg, nljump, false, work4, work1, work5);
for (size_t i = 0; i < n; i++) {
season[i] = work2[np + i] - work1[i];
}
for (size_t i = 0; i < n; i++) {
work1[i] = y[i] - season[i];
}
ess(work1, n, nt, itdeg, ntjump, userw, rw, trend, work3);
}
}
void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, size_t no, float* rw, float* season, float* trend) {
if (ns < 3) {
throw std::invalid_argument("seasonal_length must be at least 3");
}
if (nt < 3) {
throw std::invalid_argument("trend_length must be at least 3");
}
if (nl < 3) {
throw std::invalid_argument("low_pass_length must be at least 3");
}
if (np < 2) {
throw std::invalid_argument("period must be at least 2");
}
if (isdeg != 0 && isdeg != 1) {
throw std::invalid_argument("seasonal_degree must be 0 or 1");
}
if (itdeg != 0 && itdeg != 1) {
throw std::invalid_argument("trend_degree must be 0 or 1");
}
if (ildeg != 0 && ildeg != 1) {
throw std::invalid_argument("low_pass_degree must be 0 or 1");
}
if (ns % 2 != 1) {
throw std::invalid_argument("seasonal_length must be odd");
}
if (nt % 2 != 1) {
throw std::invalid_argument("trend_length must be odd");
}
if (nl % 2 != 1) {
throw std::invalid_argument("low_pass_length must be odd");
}
auto work1 = std::vector<float>(n + 2 * np);
auto work2 = std::vector<float>(n + 2 * np);
auto work3 = std::vector<float>(n + 2 * np);
auto work4 = std::vector<float>(n + 2 * np);
auto work5 = std::vector<float>(n + 2 * np);
auto userw = false;
size_t k = 0;
while (true) {
onestp(y, n, np, ns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, userw, rw, season, trend, work1.data(), work2.data(), work3.data(), work4.data(), work5.data());
k += 1;
if (k > no) {
break;
}
for (size_t i = 0; i < n; i++) {
work1[i] = trend[i] + season[i];
}
rwts(y, n, work1.data(), rw);
userw = true;
}
if (no <= 0) {
for (size_t i = 0; i < n; i++) {
rw[i] = 1.0;
}
}
}
float var(const std::vector<float>& series) {
auto mean = std::accumulate(series.begin(), series.end(), 0.0) / series.size();
std::vector<float> tmp;
tmp.reserve(series.size());
for (auto v : series) {
tmp.push_back(pow(v - mean, 2));
}
return std::accumulate(tmp.begin(), tmp.end(), 0.0) / (series.size() - 1);
}
float strength(const std::vector<float>& component, const std::vector<float>& remainder) {
std::vector<float> sr;
sr.reserve(remainder.size());
for (size_t i = 0; i < remainder.size(); i++) {
sr.push_back(component[i] + remainder[i]);
}
return std::max(0.0, 1.0 - var(remainder) / var(sr));
}
class StlResult {
public:
std::vector<float> seasonal;
std::vector<float> trend;
std::vector<float> remainder;
std::vector<float> weights;
inline float seasonal_strength() {
return strength(seasonal, remainder);
}
inline float trend_strength() {
return strength(trend, remainder);
}
};
class StlParams {
std::optional<size_t> ns_ = std::nullopt;
std::optional<size_t> nt_ = std::nullopt;
std::optional<size_t> nl_ = std::nullopt;
int isdeg_ = 0;
int itdeg_ = 1;
std::optional<int> ildeg_ = std::nullopt;
std::optional<size_t> nsjump_ = std::nullopt;
std::optional<size_t> ntjump_ = std::nullopt;
std::optional<size_t> nljump_ = std::nullopt;
std::optional<size_t> ni_ = std::nullopt;
std::optional<size_t> no_ = std::nullopt;
bool robust_ = false;
public:
inline StlParams seasonal_length(size_t ns) {
this->ns_ = ns;
return *this;
}
inline StlParams trend_length(size_t nt) {
this->nt_ = nt;
return *this;
}
inline StlParams low_pass_length(size_t nl) {
this->nl_ = nl;
return *this;
}
inline StlParams seasonal_degree(int isdeg) {
this->isdeg_ = isdeg;
return *this;
}
inline StlParams trend_degree(int itdeg) {
this->itdeg_ = itdeg;
return *this;
}
inline StlParams low_pass_degree(int ildeg) {
this->ildeg_ = ildeg;
return *this;
}
inline StlParams seasonal_jump(size_t nsjump) {
this->nsjump_ = nsjump;
return *this;
}
inline StlParams trend_jump(size_t ntjump) {
this->ntjump_ = ntjump;
return *this;
}
inline StlParams low_pass_jump(size_t nljump) {
this->nljump_ = nljump;
return *this;
}
inline StlParams inner_loops(bool ni) {
this->ni_ = ni;
return *this;
}
inline StlParams outer_loops(bool no) {
this->no_ = no;
return *this;
}
inline StlParams robust(bool robust) {
this->robust_ = robust;
return *this;
}
StlResult fit(const float* y, size_t n, size_t np);
StlResult fit(const std::vector<float>& y, size_t np);
};
StlParams params() {
return StlParams();
}
StlResult StlParams::fit(const float* y, size_t n, size_t np) {
if (n < 2 * np) {
throw std::invalid_argument("series has less than two periods");
}
auto ns = this->ns_.value_or(np);
auto isdeg = this->isdeg_;
auto itdeg = this->itdeg_;
auto res = StlResult {
std::vector<float>(n),
std::vector<float>(n),
std::vector<float>(),
std::vector<float>(n)
};
auto ildeg = this->ildeg_.value_or(itdeg);
auto newns = std::max(ns, (size_t) 3);
if (newns % 2 == 0) {
newns += 1;
}
auto newnp = std::max(np, (size_t) 2);
auto nt = (size_t) ceil((1.5 * newnp) / (1.0 - 1.5 / (float) newns));
nt = this->nt_.value_or(nt);
nt = std::max(nt, (size_t) 3);
if (nt % 2 == 0) {
nt += 1;
}
auto nl = this->nl_.value_or(newnp);
if (nl % 2 == 0 && !this->nl_.has_value()) {
nl += 1;
}
auto ni = this->ni_.value_or(this->robust_ ? 1 : 2);
auto no = this->no_.value_or(this->robust_ ? 15 : 0);
auto nsjump = this->nsjump_.value_or((size_t) ceil(((float) newns) / 10.0));
auto ntjump = this->ntjump_.value_or((size_t) ceil(((float) nt) / 10.0));
auto nljump = this->nljump_.value_or((size_t) ceil(((float) nl) / 10.0));
stl(y, n, newnp, newns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, no, res.weights.data(), res.seasonal.data(), res.trend.data());
res.remainder.reserve(n);
for (size_t i = 0; i < n; i++) {
res.remainder.push_back(y[i] - res.seasonal[i] - res.trend[i]);
}
return res;
}
StlResult StlParams::fit(const std::vector<float>& y, size_t np) {
return StlParams::fit(y.data(), y.size(), np);
}
}

View File

@ -170,7 +170,7 @@ std::unique_ptr<ReadBuffer> wrapReadBufferWithCompressionMethod(
}
std::unique_ptr<WriteBuffer> wrapWriteBufferWithCompressionMethod(
std::unique_ptr<WriteBuffer> nested, CompressionMethod method, int level, size_t buf_size, char * existing_memory, size_t alignment)
std::unique_ptr<WriteBuffer> nested, CompressionMethod method, int level, int zstd_window_log, size_t buf_size, char * existing_memory, size_t alignment)
{
if (method == DB::CompressionMethod::Gzip || method == CompressionMethod::Zlib)
return std::make_unique<ZlibDeflatingWriteBuffer>(std::move(nested), method, level, buf_size, existing_memory, alignment);
@ -183,7 +183,7 @@ std::unique_ptr<WriteBuffer> wrapWriteBufferWithCompressionMethod(
return std::make_unique<LZMADeflatingWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
if (method == CompressionMethod::Zstd)
return std::make_unique<ZstdDeflatingWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
return std::make_unique<ZstdDeflatingWriteBuffer>(std::move(nested), level, zstd_window_log, buf_size, existing_memory, alignment);
if (method == CompressionMethod::Lz4)
return std::make_unique<Lz4DeflatingWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);

View File

@ -66,6 +66,7 @@ std::unique_ptr<WriteBuffer> wrapWriteBufferWithCompressionMethod(
std::unique_ptr<WriteBuffer> nested,
CompressionMethod method,
int level,
int zstd_window_log = 0,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr,
size_t alignment = 0);

View File

@ -3,7 +3,6 @@
#if USE_AWS_S3
#include <aws/core/client/CoreErrors.h>
#include <aws/core/client/DefaultRetryStrategy.h>
#include <aws/s3/model/HeadBucketRequest.h>
#include <aws/s3/model/GetObjectRequest.h>
#include <aws/s3/model/HeadObjectRequest.h>
@ -15,7 +14,6 @@
#include <Poco/Net/NetException.h>
#include <IO/S3Common.h>
#include <IO/S3/Requests.h>
#include <IO/S3/PocoHTTPClientFactory.h>
#include <IO/S3/AWSLogger.h>
@ -37,6 +35,9 @@ namespace ProfileEvents
extern const Event DiskS3WriteRequestsErrors;
extern const Event DiskS3ReadRequestsErrors;
extern const Event S3Clients;
extern const Event TinyS3Clients;
}
namespace DB
@ -199,6 +200,8 @@ Client::Client(
cache = std::make_shared<ClientCache>();
ClientCacheRegistry::instance().registerClient(cache);
ProfileEvents::increment(ProfileEvents::S3Clients);
}
Client::Client(
@ -219,6 +222,22 @@ Client::Client(
{
cache = std::make_shared<ClientCache>(*other.cache);
ClientCacheRegistry::instance().registerClient(cache);
ProfileEvents::increment(ProfileEvents::TinyS3Clients);
}
Client::~Client()
{
try
{
ClientCacheRegistry::instance().unregisterClient(cache.get());
}
catch (...)
{
tryLogCurrentException(log);
throw;
}
}
Aws::Auth::AWSCredentials Client::getCredentials() const

View File

@ -142,18 +142,7 @@ public:
Client(Client && other) = delete;
Client & operator=(Client &&) = delete;
~Client() override
{
try
{
ClientCacheRegistry::instance().unregisterClient(cache.get());
}
catch (...)
{
tryLogCurrentException(log);
throw;
}
}
~Client() override;
/// Returns the initial endpoint.
const String & getInitialEndpoint() const { return initial_endpoint; }
@ -170,7 +159,7 @@ public:
class RetryStrategy : public Aws::Client::RetryStrategy
{
public:
RetryStrategy(uint32_t maxRetries_ = 10, uint32_t scaleFactor_ = 25, uint32_t maxDelayMs_ = 90000);
explicit RetryStrategy(uint32_t maxRetries_ = 10, uint32_t scaleFactor_ = 25, uint32_t maxDelayMs_ = 90000);
/// NOLINTNEXTLINE(google-runtime-int)
bool ShouldRetry(const Aws::Client::AWSError<Aws::Client::CoreErrors>& error, long attemptedRetries) const override;

View File

@ -6,21 +6,12 @@
#if USE_AWS_S3
# include <Common/quoteString.h>
# include <IO/WriteBufferFromString.h>
# include <IO/HTTPHeaderEntries.h>
# include <Storages/StorageS3Settings.h>
# include <IO/S3/PocoHTTPClientFactory.h>
# include <IO/S3/PocoHTTPClient.h>
# include <IO/S3/Client.h>
# include <IO/S3/URI.h>
# include <IO/S3/Requests.h>
# include <IO/S3/Credentials.h>
# include <Common/quoteString.h>
# include <Common/logger_useful.h>
# include <fstream>
namespace ProfileEvents
{
@ -147,6 +138,12 @@ AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const
};
}
bool AuthSettings::hasUpdates(const AuthSettings & other) const
{
AuthSettings copy = *this;
copy.updateFrom(other);
return *this != copy;
}
void AuthSettings::updateFrom(const AuthSettings & from)
{
@ -175,7 +172,7 @@ void AuthSettings::updateFrom(const AuthSettings & from)
expiration_window_seconds = from.expiration_window_seconds;
if (from.no_sign_request.has_value())
no_sign_request = *from.no_sign_request;
no_sign_request = from.no_sign_request;
}
}

View File

@ -92,9 +92,11 @@ struct AuthSettings
std::optional<uint64_t> expiration_window_seconds;
std::optional<bool> no_sign_request;
bool operator==(const AuthSettings & other) const = default;
bool hasUpdates(const AuthSettings & other) const;
void updateFrom(const AuthSettings & from);
private:
bool operator==(const AuthSettings & other) const = default;
};
}

View File

@ -1,30 +1,51 @@
#include <IO/ZstdDeflatingWriteBuffer.h>
#include <Common/Exception.h>
#include <IO/WriteHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ZSTD_ENCODER_FAILED;
extern const int ILLEGAL_CODEC_PARAMETER;
}
static void setZstdParameter(ZSTD_CCtx * cctx, ZSTD_cParameter param, int value)
{
auto ret = ZSTD_CCtx_setParameter(cctx, param, value);
if (ZSTD_isError(ret))
throw Exception(
ErrorCodes::ZSTD_ENCODER_FAILED,
"zstd stream encoder option setting failed: error code: {}; zstd version: {}",
ret,
ZSTD_VERSION_STRING);
}
ZstdDeflatingWriteBuffer::ZstdDeflatingWriteBuffer(
std::unique_ptr<WriteBuffer> out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment)
std::unique_ptr<WriteBuffer> out_, int compression_level, int window_log, size_t buf_size, char * existing_memory, size_t alignment)
: WriteBufferWithOwnMemoryDecorator(std::move(out_), buf_size, existing_memory, alignment)
{
cctx = ZSTD_createCCtx();
if (cctx == nullptr)
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder init failed: zstd version: {}", ZSTD_VERSION_STRING);
size_t ret = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compression_level);
if (ZSTD_isError(ret))
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED,
"zstd stream encoder option setting failed: error code: {}; zstd version: {}",
ret, ZSTD_VERSION_STRING);
ret = ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1);
if (ZSTD_isError(ret))
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED,
"zstd stream encoder option setting failed: error code: {}; zstd version: {}",
ret, ZSTD_VERSION_STRING);
setZstdParameter(cctx, ZSTD_c_compressionLevel, compression_level);
if (window_log > 0)
{
ZSTD_bounds window_log_bounds = ZSTD_cParam_getBounds(ZSTD_c_windowLog);
if (ZSTD_isError(window_log_bounds.error))
throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "ZSTD windowLog parameter is not supported {}",
std::string(ZSTD_getErrorName(window_log_bounds.error)));
if (window_log > window_log_bounds.upperBound || window_log < window_log_bounds.lowerBound)
throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER,
"ZSTD codec can't have window log more than {} and lower than {}, given {}",
toString(window_log_bounds.upperBound),
toString(window_log_bounds.lowerBound), toString(window_log));
setZstdParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1);
setZstdParameter(cctx, ZSTD_c_windowLog, window_log);
}
setZstdParameter(cctx, ZSTD_c_checksumFlag, 1);
input = {nullptr, 0, 0};
output = {nullptr, 0, 0};

View File

@ -17,6 +17,7 @@ public:
ZstdDeflatingWriteBuffer(
std::unique_ptr<WriteBuffer> out_,
int compression_level,
int window_log = 0,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr,
size_t alignment = 0);

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/Access/InterpreterCreateQuotaQuery.h>
#include <Access/AccessControl.h>
@ -160,4 +161,13 @@ void InterpreterCreateQuotaQuery::updateQuotaFromQuery(Quota & quota, const ASTC
updateQuotaFromQueryImpl(quota, query, {}, {});
}
void registerInterpreterCreateQuotaQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterCreateQuotaQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterCreateQuotaQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/Access/InterpreterCreateRoleQuery.h>
#include <Access/AccessControl.h>
@ -122,4 +123,14 @@ void InterpreterCreateRoleQuery::updateRoleFromQuery(Role & role, const ASTCreat
{
updateRoleFromQueryImpl(role, query, {}, {});
}
void registerInterpreterCreateRoleQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterCreateRoleQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterCreateRoleQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/Access/InterpreterCreateRowPolicyQuery.h>
#include <Access/AccessControl.h>
@ -148,4 +149,13 @@ AccessRightsElements InterpreterCreateRowPolicyQuery::getRequiredAccess() const
return res;
}
void registerInterpreterCreateRowPolicyQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterCreateRowPolicyQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterCreateRowPolicyQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/Access/InterpreterCreateSettingsProfileQuery.h>
#include <Access/AccessControl.h>
@ -138,4 +139,14 @@ void InterpreterCreateSettingsProfileQuery::updateSettingsProfileFromQuery(Setti
{
updateSettingsProfileFromQueryImpl(SettingsProfile, query, {}, {}, {});
}
void registerInterpreterCreateSettingsProfileQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterCreateSettingsProfileQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterCreateSettingsProfileQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/Access/InterpreterCreateUserQuery.h>
#include <Access/AccessControl.h>
@ -261,4 +262,13 @@ void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreat
updateUserFromQueryImpl(user, query, auth_data, {}, {}, {}, {}, {}, allow_no_password, allow_plaintext_password, true);
}
void registerInterpreterCreateUserQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterCreateUserQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterCreateUserQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/Access/InterpreterDropAccessEntityQuery.h>
#include <Access/AccessControl.h>
@ -95,4 +96,13 @@ AccessRightsElements InterpreterDropAccessEntityQuery::getRequiredAccess() const
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{}: type is not supported by DROP query", toString(query.type));
}
void registerInterpreterDropAccessEntityQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterDropAccessEntityQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterDropAccessEntityQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/Access/InterpreterGrantQuery.h>
#include <Parsers/Access/ASTGrantQuery.h>
#include <Parsers/Access/ASTRolesOrUsersSet.h>
@ -480,4 +481,13 @@ void InterpreterGrantQuery::updateRoleFromQuery(Role & role, const ASTGrantQuery
updateFromQuery(role, query);
}
void registerInterpreterGrantQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterGrantQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterGrantQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/Access/InterpreterMoveAccessEntityQuery.h>
#include <Parsers/Access/ASTMoveAccessEntityQuery.h>
#include <Parsers/Access/ASTRowPolicyName.h>
@ -90,4 +91,13 @@ AccessRightsElements InterpreterMoveAccessEntityQuery::getRequiredAccess() const
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{}: type is not supported by DROP query", toString(query.type));
}
void registerInterpreterMoveAccessEntityQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterMoveAccessEntityQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterMoveAccessEntityQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/Access/InterpreterSetRoleQuery.h>
#include <Parsers/Access/ASTSetRoleQuery.h>
#include <Parsers/Access/ASTRolesOrUsersSet.h>
@ -90,4 +91,13 @@ void InterpreterSetRoleQuery::updateUserSetDefaultRoles(User & user, const Roles
user.default_roles = roles_from_query;
}
void registerInterpreterSetRoleQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterSetRoleQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterSetRoleQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/Access/InterpreterShowAccessEntitiesQuery.h>
#include <Parsers/Access/ASTShowAccessEntitiesQuery.h>
#include <Parsers/formatAST.h>
@ -125,4 +126,13 @@ String InterpreterShowAccessEntitiesQuery::getRewrittenQuery() const
(order.empty() ? "" : " ORDER BY " + order);
}
void registerInterpreterShowAccessEntitiesQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterShowAccessEntitiesQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterShowAccessEntitiesQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/Access/InterpreterShowAccessQuery.h>
#include <Parsers/formatAST.h>
@ -80,4 +81,13 @@ ASTs InterpreterShowAccessQuery::getCreateAndGrantQueries() const
return result;
}
void registerInterpreterShowAccessQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterShowAccessQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterShowAccessQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/Access/InterpreterShowCreateAccessEntityQuery.h>
#include <Interpreters/formatWithPossiblyHidingSecrets.h>
#include <Parsers/Access/ASTShowCreateAccessEntityQuery.h>
@ -420,4 +421,14 @@ AccessRightsElements InterpreterShowCreateAccessEntityQuery::getRequiredAccess()
}
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{}: type is not supported by SHOW CREATE query", toString(show_query.type));
}
void registerInterpreterShowCreateAccessEntityQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterShowCreateAccessEntityQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterShowCreateAccessEntityQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/Access/InterpreterShowGrantsQuery.h>
#include <Parsers/Access/ASTGrantQuery.h>
#include <Parsers/Access/ASTRolesOrUsersSet.h>
@ -198,4 +199,13 @@ ASTs InterpreterShowGrantsQuery::getAttachGrantQueries(const IAccessEntity & use
return getGrantQueriesImpl(user_or_role, nullptr, true);
}
void registerInterpreterShowGrantsQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterShowGrantsQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterShowGrantsQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/Access/InterpreterShowPrivilegesQuery.h>
#include <Interpreters/executeQuery.h>
@ -15,4 +16,14 @@ BlockIO InterpreterShowPrivilegesQuery::execute()
return executeQuery("SELECT * FROM system.privileges", context, QueryFlags{ .internal = true }).second;
}
void registerInterpreterShowPrivilegesQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterShowPrivilegesQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterShowPrivilegesQuery", create_fn);
}
}

View File

@ -1,8 +1,9 @@
#pragma once
#include <functional>
#include <string>
#include <Core/Defines.h>
#include <Interpreters/Cache/FileCache_fwd.h>
#include <string>
namespace Poco { namespace Util { class AbstractConfiguration; } } // NOLINT(cppcoreguidelines-virtual-class-destructor)

View File

@ -46,6 +46,9 @@ ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr<Tabl
auto inner_hash_join = std::make_shared<InternalHashJoin>();
inner_hash_join->data = std::make_unique<HashJoin>(table_join_, right_sample_block, any_take_last_row_, 0, fmt::format("concurrent{}", i));
/// Non zero `max_joined_block_rows` allows to process block partially and return not processed part.
/// TODO: It's not handled properly in ConcurrentHashJoin case, so we set it to 0 to disable this feature.
inner_hash_join->data->setMaxJoinedBlockRows(0);
hash_joins.emplace_back(std::move(inner_hash_join));
}
}

View File

@ -1583,9 +1583,7 @@ bool Context::hasScalar(const String & name) const
void Context::addQueryAccessInfo(
const String & quoted_database_name,
const String & full_quoted_table_name,
const Names & column_names,
const String & projection_name,
const String & view_name)
const Names & column_names)
{
if (isGlobalContext())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info");
@ -1593,12 +1591,9 @@ void Context::addQueryAccessInfo(
std::lock_guard lock(query_access_info.mutex);
query_access_info.databases.emplace(quoted_database_name);
query_access_info.tables.emplace(full_quoted_table_name);
for (const auto & column_name : column_names)
query_access_info.columns.emplace(full_quoted_table_name + "." + backQuoteIfNeed(column_name));
if (!projection_name.empty())
query_access_info.projections.emplace(full_quoted_table_name + "." + backQuoteIfNeed(projection_name));
if (!view_name.empty())
query_access_info.views.emplace(view_name);
}
void Context::addQueryAccessInfo(const Names & partition_names)
@ -1611,6 +1606,15 @@ void Context::addQueryAccessInfo(const Names & partition_names)
query_access_info.partitions.emplace(partition_name);
}
void Context::addViewAccessInfo(const String & view_name)
{
if (isGlobalContext())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info");
std::lock_guard<std::mutex> lock(query_access_info.mutex);
query_access_info.views.emplace(view_name);
}
void Context::addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name)
{
if (!qualified_projection_name)

View File

@ -693,13 +693,14 @@ public:
void addSpecialScalar(const String & name, const Block & block);
const QueryAccessInfo & getQueryAccessInfo() const { return query_access_info; }
void addQueryAccessInfo(
const String & quoted_database_name,
const String & full_quoted_table_name,
const Names & column_names,
const String & projection_name = {},
const String & view_name = {});
const Names & column_names);
void addQueryAccessInfo(const Names & partition_names);
void addViewAccessInfo(const String & view_name);
struct QualifiedProjectionName
{
@ -707,8 +708,8 @@ public:
String projection_name;
explicit operator bool() const { return !projection_name.empty(); }
};
void addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name);
void addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name);
/// Supported factories for records in query_log
enum class QueryLogFactories

View File

@ -1050,7 +1050,7 @@ static std::unique_ptr<QueryPlan> buildJoinedPlan(
join_element.table_expression,
context,
original_right_column_names,
query_options.copy().setWithAllColumns().ignoreProjections(false).ignoreAlias(false));
query_options.copy().setWithAllColumns().ignoreAlias(false));
auto joined_plan = std::make_unique<QueryPlan>();
interpreter->buildQueryPlan(*joined_plan);
{

View File

@ -243,6 +243,7 @@ HashJoin::HashJoin(std::shared_ptr<TableJoin> table_join_, const Block & right_s
, asof_inequality(table_join->getAsofInequality())
, data(std::make_shared<RightTableData>())
, right_sample_block(right_sample_block_)
, max_joined_block_rows(table_join->maxJoinedBlockRows())
, instance_log_id(!instance_id_.empty() ? "(" + instance_id_ + ") " : "")
, log(&Poco::Logger::get("HashJoin"))
{
@ -1401,7 +1402,7 @@ NO_INLINE size_t joinRightColumns(
{
if constexpr (join_features.need_replication)
{
if (unlikely(current_offset > max_joined_block_rows))
if (unlikely(current_offset >= max_joined_block_rows))
{
added_columns.offsets_to_replicate->resize_assume_reserved(i);
added_columns.filter.resize_assume_reserved(i);
@ -1690,7 +1691,7 @@ Block HashJoin::joinBlockImpl(
bool has_required_right_keys = (required_right_keys.columns() != 0);
added_columns.need_filter = join_features.need_filter || has_required_right_keys;
added_columns.max_joined_block_rows = table_join->maxJoinedBlockRows();
added_columns.max_joined_block_rows = max_joined_block_rows;
if (!added_columns.max_joined_block_rows)
added_columns.max_joined_block_rows = std::numeric_limits<size_t>::max();
else
@ -1771,7 +1772,6 @@ Block HashJoin::joinBlockImpl(
void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const
{
size_t max_joined_block_rows = table_join->maxJoinedBlockRows();
size_t start_left_row = 0;
size_t start_right_block = 0;
if (not_processed)

View File

@ -396,6 +396,8 @@ public:
void shrinkStoredBlocksToFit(size_t & total_bytes_in_join);
void setMaxJoinedBlockRows(size_t value) { max_joined_block_rows = value; }
private:
template<bool> friend class NotJoinedHash;
@ -433,6 +435,9 @@ private:
/// Left table column names that are sources for required_right_keys columns
std::vector<String> required_right_keys_sources;
/// Maximum number of rows in result block. If it is 0, then no limits.
size_t max_joined_block_rows = 0;
/// When tracked memory consumption is more than a threshold, we will shrink to fit stored blocks.
bool shrink_blocks = false;
Int64 memory_usage_before_adding_blocks = 0;

View File

@ -1,4 +1,5 @@
#include <Interpreters/InterpreterAlterNamedCollectionQuery.h>
#include <Interpreters/InterpreterFactory.h>
#include <Parsers/ASTAlterNamedCollectionQuery.h>
#include <Access/ContextAccess.h>
#include <Interpreters/Context.h>
@ -26,4 +27,13 @@ BlockIO InterpreterAlterNamedCollectionQuery::execute()
return {};
}
void registerInterpreterAlterNamedCollectionQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterAlterNamedCollectionQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterAlterNamedCollectionQuery", create_fn);
}
}

View File

@ -1,4 +1,5 @@
#include <Interpreters/InterpreterAlterQuery.h>
#include <Interpreters/InterpreterFactory.h>
#include <Access/Common/AccessRightsElement.h>
#include <Databases/DatabaseFactory.h>
@ -535,4 +536,13 @@ void InterpreterAlterQuery::extendQueryLogElemImpl(QueryLogElement & elem, const
}
}
void registerInterpreterAlterQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterAlterQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterAlterQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/InterpreterBackupQuery.h>
#include <Backups/BackupsWorker.h>
@ -47,4 +48,13 @@ BlockIO InterpreterBackupQuery::execute()
return res_io;
}
void registerInterpreterBackupQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterBackupQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterBackupQuery", create_fn);
}
}

View File

@ -1,4 +1,5 @@
#include <Interpreters/InterpreterCheckQuery.h>
#include <Interpreters/InterpreterFactory.h>
#include <algorithm>
@ -472,4 +473,13 @@ BlockIO InterpreterCheckQuery::execute()
return res;
}
void registerInterpreterCheckQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterCheckQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterCheckQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/InterpreterCreateFunctionQuery.h>
#include <Access/ContextAccess.h>
@ -53,4 +54,13 @@ BlockIO InterpreterCreateFunctionQuery::execute()
return {};
}
void registerInterpreterCreateFunctionQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterCreateFunctionQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterCreateFunctionQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/InterpreterCreateIndexQuery.h>
#include <Access/ContextAccess.h>
@ -99,4 +100,13 @@ BlockIO InterpreterCreateIndexQuery::execute()
return {};
}
void registerInterpreterCreateIndexQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterCreateIndexQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterCreateIndexQuery", create_fn);
}
}

View File

@ -1,4 +1,5 @@
#include <Interpreters/InterpreterCreateNamedCollectionQuery.h>
#include <Interpreters/InterpreterFactory.h>
#include <Parsers/ASTCreateNamedCollectionQuery.h>
#include <Access/ContextAccess.h>
#include <Interpreters/Context.h>
@ -26,4 +27,13 @@ BlockIO InterpreterCreateNamedCollectionQuery::execute()
return {};
}
void registerInterpreterCreateNamedCollectionQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterCreateNamedCollectionQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterCreateNamedCollectionQuery", create_fn);
}
}

View File

@ -40,6 +40,7 @@
#include <Interpreters/executeQuery.h>
#include <Interpreters/DDLTask.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/InterpreterCreateQuery.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
@ -1896,4 +1897,13 @@ void InterpreterCreateQuery::addColumnsDescriptionToCreateQueryIfNecessary(ASTCr
}
}
void registerInterpreterCreateQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterCreateQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterCreateQuery", create_fn);
}
}

View File

@ -1,4 +1,5 @@
#include <Interpreters/InterpreterDeleteQuery.h>
#include <Interpreters/InterpreterFactory.h>
#include <Access/ContextAccess.h>
#include <Databases/DatabaseReplicated.h>
@ -110,4 +111,13 @@ BlockIO InterpreterDeleteQuery::execute()
}
}
void registerInterpreterDeleteQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterDeleteQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterDeleteQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/InterpreterDescribeCacheQuery.h>
#include <Interpreters/Context.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
@ -68,4 +69,13 @@ BlockIO InterpreterDescribeCacheQuery::execute()
return res;
}
void registerInterpreterDescribeCacheQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterDescribeCacheQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterDescribeCacheQuery", create_fn);
}
}

View File

@ -9,6 +9,7 @@
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
#include <Interpreters/Context.h>
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/InterpreterDescribeQuery.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Access/Common/AccessFlags.h>
@ -271,4 +272,13 @@ void InterpreterDescribeQuery::addSubcolumns(const ColumnDescription & column, b
}, ISerialization::SubstreamData(type->getDefaultSerialization()).withType(type));
}
void registerInterpreterDescribeQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterDescribeQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterDescribeQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/InterpreterDropFunctionQuery.h>
#include <Access/ContextAccess.h>
@ -49,4 +50,13 @@ BlockIO InterpreterDropFunctionQuery::execute()
return {};
}
void registerInterpreterDropFunctionQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterDropFunctionQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterDropFunctionQuery", create_fn);
}
}

View File

@ -2,6 +2,7 @@
#include <Databases/DatabaseReplicated.h>
#include <Interpreters/Context.h>
#include <Interpreters/InterpreterDropIndexQuery.h>
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/executeDDLQueryOnCluster.h>
#include <Parsers/ASTDropIndexQuery.h>
#include <Parsers/ASTIdentifier.h>
@ -68,4 +69,13 @@ BlockIO InterpreterDropIndexQuery::execute()
return {};
}
void registerInterpreterDropIndexQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterDropIndexQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterDropIndexQuery", create_fn);
}
}

View File

@ -1,3 +1,4 @@
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/InterpreterDropNamedCollectionQuery.h>
#include <Parsers/ASTDropNamedCollectionQuery.h>
#include <Access/ContextAccess.h>
@ -26,4 +27,13 @@ BlockIO InterpreterDropNamedCollectionQuery::execute()
return {};
}
void registerInterpreterDropNamedCollectionQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterDropNamedCollectionQuery>(args.query, args.context);
};
factory.registerInterpreter("InterpreterDropNamedCollectionQuery", create_fn);
}
}

Some files were not shown because too many files have changed in this diff Show More